def import_concepts(reference_data): concepts = reference_data[0]['concepts'] values = reference_data[1]['values'] relations = reference_data[2]['relations'] concept_objs = {} for concept in concepts: concept_obj = Concept() concept_obj.id = concept['conceptid'] concept_obj.nodetype = concept['nodetype'] concept_obj.legacyoid = concept['legacyoid'] concept_obj.save() concept_objs[concept_obj.id] = concept_obj existing_valuetypes = [o.valuetype for o in models.DValueType.objects.all()] for value in values: if value['valuetype'] not in existing_valuetypes: models.DValueType.objects.create(valuetype = value['valuetype'], category = 'undefined', namespace = 'arches') existing_valuetypes.append(value['valuetype']) conceptvalue_obj = ConceptValue() conceptvalue_obj.id = value['valueid'] conceptvalue_obj.conceptid = value['conceptid'] conceptvalue_obj.type = value['valuetype'] conceptvalue_obj.value = value['value'] conceptvalue_obj.language = value['languageid'] conceptvalue_obj.save() for relation in relations: if relation['conceptidfrom'] in concept_objs and relation['conceptidto'] in concept_objs: conceptfrom = concept_objs[relation['conceptidfrom']] conceptto = concept_objs[relation['conceptidto']] conceptfrom.add_relation(conceptto, relation['relationtype'])
def test_create_concept(self): """ Test of basic CRUD on a Concept model """ concept_in = Concept() concept_in.nodetype = 'Concept' concept_in.values = [ConceptValue({ #id: '', #conceptid: '', 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label', 'language': 'en-US' })] concept_in.save() concept_out = Concept().get(id=concept_in.id) self.assertEqual(concept_out.id, concept_in.id) self.assertEqual(concept_out.values[0].value, 'test pref label') label = concept_in.values[0] label.value = 'updated pref label' concept_in.values[0] = label concept_in.save() concept_out = Concept().get(id=concept_in.id) self.assertEqual(concept_out.values[0].value, 'updated pref label') concept_out.delete(delete_self=True) with self.assertRaises(models.Concept.DoesNotExist): deleted_concept = Concept().get(id=concept_out.id)
def get_concepts(self, uris): """ Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087 """ default_lang = settings.LANGUAGE_CODE dcterms_identifier_type = DValueType.objects.get(valuetype=str( DCTERMS.identifier).replace(str(DCTERMS), ''), namespace='dcterms') concepts = [] langs = [] for lang in self.allowed_languages: # the AAT expects language codes to be all lower case langs.append('\"%s\"' % (lang.lower())) for uri in uris.split(','): query = """ SELECT ?value ?type WHERE { { <%s> skos:prefLabel ?value . BIND('prefLabel' AS ?type) } UNION { <%s> skos:scopeNote [rdf:value ?value] . BIND('scopeNote' AS ?type) } FILTER (lang(?value) in (%s)) }""" % (uri, uri, ','.join(langs)) results = self.perform_sparql_query(query) if len(results["results"]["bindings"]) > 0: concept = Concept() concept.nodetype = 'Concept' for result in results["results"]["bindings"]: concept.addvalue({ 'type': result["type"]["value"], 'value': result["value"]["value"], 'language': result["value"]["xml:lang"] }) concept.addvalue({ 'value': uri, 'language': settings.LANGUAGE_CODE, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) concepts.append(concept) else: raise Exception( _("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors. You may need to add the appropriate languages into the database for this query to work<br><br>" ) % (query.replace('<', '<').replace('>', '>'))) return concepts
def get_concepts(self, uris): """ Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087 """ concepts = [] langs = [] for lang in self.allowed_languages: langs.append('\"%s\"' % (lang)) for uri in uris.split(','): query = """ SELECT ?value ?type WHERE { { <%s> skos:prefLabel ?value . BIND('prefLabel' AS ?type) } UNION { <%s> skos:scopeNote [rdf:value ?value] . BIND('scopeNote' AS ?type) } FILTER (lang(?value) in (%s)) }""" % (uri, uri, ','.join(langs)) results = self.perform_sparql_query(query) if len(results["results"]["bindings"]) > 0 : concept = Concept() concept.nodetype = 'Concept' for result in results["results"]["bindings"]: concept.addvalue({ 'type': result["type"]["value"], 'value': result["value"]["value"], 'language': result["value"]["xml:lang"] }) concepts.append(concept) else: raise Exception(_("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors. You may need to add the appropriate languages into the database for this query to work<br><br>") % (query.replace('<', '<').replace('>', '>'))) return concepts
def import_concepts(reference_data): concepts = reference_data[0]['concepts'] values = reference_data[1]['values'] relations = reference_data[2]['relations'] concept_objs = {} for concept in concepts: concept_obj = Concept() concept_obj.id = concept['conceptid'] concept_obj.nodetype = concept['nodetype'] concept_obj.legacyoid = concept['legacyoid'] concept_obj.save() concept_objs[concept_obj.id] = concept_obj existing_valuetypes = [ o.valuetype for o in models.DValueType.objects.all() ] for value in values: if value['valuetype'] not in existing_valuetypes: models.DValueType.objects.create(valuetype=value['valuetype'], category='undefined', namespace='arches') existing_valuetypes.append(value['valuetype']) conceptvalue_obj = ConceptValue() conceptvalue_obj.id = value['valueid'] conceptvalue_obj.conceptid = value['conceptid'] conceptvalue_obj.type = value['valuetype'] conceptvalue_obj.value = value['value'] conceptvalue_obj.language = value['languageid'] conceptvalue_obj.save() for relation in relations: if relation['conceptidfrom'] in concept_objs and relation[ 'conceptidto'] in concept_objs: conceptfrom = concept_objs[relation['conceptidfrom']] conceptto = concept_objs[relation['conceptidto']] conceptfrom.add_relation(conceptto, relation['relationtype'])
def test_create_concept(self): """ Test of basic CRUD on a Concept model """ concept_in = Concept() concept_in.nodetype = "Concept" concept_in.values = [ ConceptValue( { # id: '', # conceptid: '', "type": "prefLabel", "category": "label", "value": "test pref label", "language": "en-US", } ) ] concept_in.save() concept_out = Concept().get(id=concept_in.id) self.assertEqual(concept_out.id, concept_in.id) self.assertEqual(concept_out.values[0].value, "test pref label") label = concept_in.values[0] label.value = "updated pref label" concept_in.values[0] = label concept_in.save() concept_out = Concept().get(id=concept_in.id) self.assertEqual(concept_out.values[0].value, "updated pref label") concept_out.delete(delete_self=True) with self.assertRaises(models.Concept.DoesNotExist): deleted_concept = Concept().get(id=concept_out.id)
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping): print filename.upper() start = time() value_types = models.ValueTypes.objects.all() filepath = os.path.join(path_to_authority_files, filename) unicodecsv.field_size_limit(sys.maxint) errors = [] lookups = Lookups() #create nodes for each authority document file and relate them to the authority document node in the concept schema auth_doc_file_name = str(filename) display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip()) if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV': top_concept = Concept() top_concept.id = str(uuid.uuid4()) top_concept.nodetype = 'Concept' top_concept.legacyoid = auth_doc_file_name top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id) else: top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005') top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv' lookups.add_lookup(concept=top_concept, rownum=0) try: with open(filepath, 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'MISSING' in row: raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING'])) else: legacyoid = row[u'CONCEPTID'] concept = Concept() concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4()) concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection' concept.legacyoid = row[u'CONCEPTID'] concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) if row['CONCEPTTYPE'].lower() == 'collector': concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'}) if row[u'ALTLABELS'] != '': altlabel_list = row[u'ALTLABELS'].split(';') for altlabel in altlabel_list: concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'}) parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num) # don't add a member relationship between a top concept and it's children if parent_concept_id != top_concept.id: lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num) # add the member relationship from the E55 type (typically) to their top members if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name: for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]: lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num) if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'): raise Exception('The row has invalid values.') lookups.add_lookup(concept=concept, rownum=rows.line_num) except Exception as e: errors.append('ERROR in row %s: %s' % (rows.line_num, str(e))) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') try: # try and open the values file if it exists if exists(filepath.replace('.csv', '.values.csv')): with open(filepath.replace('.csv', '.values.csv'), 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'ADDITIONAL' in row: raise Exception('The row wasn\'t parsed properly. Additional fields found %s. Add quotes to values that have commas in them.' % (row['ADDITIONAL'])) else: row_valuetype = row[u'VALUETYPE'].strip() if row_valuetype not in value_types.values_list('valuetype', flat=True): valuetype = models.ValueTypes() valuetype.valuetype = row_valuetype valuetype.category = 'undefined' valuetype.namespace = 'arches' valuetype.save() value_types = models.ValueTypes.objects.all() concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID']) category = value_types.get(valuetype=row_valuetype).category concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category}) except Exception as e: errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row)) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv'))) errors.append('\n\n\n\n') # insert and index the concpets for key in lookups.lookup: try: lookups.lookup[key]['concept'].save() except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc())) lookups.lookup[key]['concept'].index(scheme=top_concept) # insert the concept relations for relation in lookups.concept_relationships: sql = """ INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype) VALUES ('%s', '%s', '%s'); """%(relation['source'], relation['target'], relation['type']) #print sql try: cursor.execute(sql) except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start)) return errors
def test_prefLabel(self): """ Test to confirm the proper retrieval of the prefLabel based on different language requirements """ concept = Concept() concept.nodetype = 'Concept' concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en-US', 'language': 'en-US' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en', 'language': 'en' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es-SP', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual( concept.get_preflabel(lang='en-US').value, 'test pref label en-US') self.assertEqual( concept.get_preflabel(lang='en').value, 'test pref label en') self.assertEqual(concept.get_preflabel().value, 'test pref label %s' % (test_settings.LANGUAGE_CODE)) concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en', 'language': 'en' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] # should pick the base language if it can't find the more specific version self.assertEqual( concept.get_preflabel(lang='en-US').value, 'test pref label en') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual( concept.get_preflabel(lang='en-US').value, 'test alt label en-US') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en', 'language': 'en' }) ] self.assertEqual( concept.get_preflabel(lang='en-US').value, 'test alt label en') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en-US', 'language': 'en-US' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual( concept.get_preflabel(lang='en').value, 'test pref label en-US')
def test_prefLabel(self): """ Test to confirm the proper retrieval of the prefLabel based on different language requirements """ concept = Concept() concept.nodetype = 'Concept' concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en-US', 'language': 'en-US' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en', 'language': 'en' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es-SP', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test pref label en-US') self.assertEqual(concept.get_preflabel(lang='en').value, 'test pref label en') self.assertEqual(concept.get_preflabel().value, 'test pref label %s' % (test_settings.LANGUAGE_CODE)) concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en', 'language': 'en' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] # should pick the base language if it can't find the more specific version self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test pref label en') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test alt label en-US') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en', 'language': 'en' }) ] self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test alt label en') concept.values = [ ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label en-US', 'language': 'en-US' }), ConceptValue({ 'type': 'prefLabel', 'category': 'label', 'value': 'test pref label es', 'language': 'es-SP' }), ConceptValue({ 'type': 'altLabel', 'category': 'label', 'value': 'test alt label en-US', 'language': 'en-US' }) ] self.assertEqual(concept.get_preflabel(lang='en').value, 'test pref label en-US')