def import_concepts(reference_data): concepts = reference_data[0]['concepts'] values = reference_data[1]['values'] relations = reference_data[2]['relations'] concept_objs = {} for concept in concepts: concept_obj = Concept() concept_obj.id = concept['conceptid'] concept_obj.nodetype = concept['nodetype'] concept_obj.legacyoid = concept['legacyoid'] concept_obj.save() concept_objs[concept_obj.id] = concept_obj existing_valuetypes = [o.valuetype for o in models.DValueType.objects.all()] for value in values: if value['valuetype'] not in existing_valuetypes: models.DValueType.objects.create(valuetype = value['valuetype'], category = 'undefined', namespace = 'arches') existing_valuetypes.append(value['valuetype']) conceptvalue_obj = ConceptValue() conceptvalue_obj.id = value['valueid'] conceptvalue_obj.conceptid = value['conceptid'] conceptvalue_obj.type = value['valuetype'] conceptvalue_obj.value = value['value'] conceptvalue_obj.language = value['languageid'] conceptvalue_obj.save() for relation in relations: if relation['conceptidfrom'] in concept_objs and relation['conceptidto'] in concept_objs: conceptfrom = concept_objs[relation['conceptidfrom']] conceptto = concept_objs[relation['conceptidto']] conceptfrom.add_relation(conceptto, relation['relationtype'])
def import_concepts(reference_data): concepts = reference_data[0]['concepts'] values = reference_data[1]['values'] relations = reference_data[2]['relations'] concept_objs = {} for concept in concepts: concept_obj = Concept() concept_obj.id = concept['conceptid'] concept_obj.nodetype = concept['nodetype'] concept_obj.legacyoid = concept['legacyoid'] concept_obj.save() concept_objs[concept_obj.id] = concept_obj existing_valuetypes = [ o.valuetype for o in models.DValueType.objects.all() ] for value in values: if value['valuetype'] not in existing_valuetypes: models.DValueType.objects.create(valuetype=value['valuetype'], category='undefined', namespace='arches') existing_valuetypes.append(value['valuetype']) conceptvalue_obj = ConceptValue() conceptvalue_obj.id = value['valueid'] conceptvalue_obj.conceptid = value['conceptid'] conceptvalue_obj.type = value['valuetype'] conceptvalue_obj.value = value['value'] conceptvalue_obj.language = value['languageid'] conceptvalue_obj.save() for relation in relations: if relation['conceptidfrom'] in concept_objs and relation[ 'conceptidto'] in concept_objs: conceptfrom = concept_objs[relation['conceptidfrom']] conceptto = concept_objs[relation['conceptidto']] conceptfrom.add_relation(conceptto, relation['relationtype'])
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping): print filename.upper() start = time() value_types = models.ValueTypes.objects.all() filepath = os.path.join(path_to_authority_files, filename) unicodecsv.field_size_limit(sys.maxint) errors = [] lookups = Lookups() #create nodes for each authority document file and relate them to the authority document node in the concept schema auth_doc_file_name = str(filename) display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip()) if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV': top_concept = Concept() top_concept.id = str(uuid.uuid4()) top_concept.nodetype = 'Concept' top_concept.legacyoid = auth_doc_file_name top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id) else: top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005') top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv' lookups.add_lookup(concept=top_concept, rownum=0) try: with open(filepath, 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'MISSING' in row: raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING'])) else: legacyoid = row[u'CONCEPTID'] concept = Concept() concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4()) concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection' concept.legacyoid = row[u'CONCEPTID'] concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) if row['CONCEPTTYPE'].lower() == 'collector': concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'}) if row[u'ALTLABELS'] != '': altlabel_list = row[u'ALTLABELS'].split(';') for altlabel in altlabel_list: concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'}) parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num) # don't add a member relationship between a top concept and it's children if parent_concept_id != top_concept.id: lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num) # add the member relationship from the E55 type (typically) to their top members if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name: for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]: lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num) if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'): raise Exception('The row has invalid values.') lookups.add_lookup(concept=concept, rownum=rows.line_num) except Exception as e: errors.append('ERROR in row %s: %s' % (rows.line_num, str(e))) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') try: # try and open the values file if it exists if exists(filepath.replace('.csv', '.values.csv')): with open(filepath.replace('.csv', '.values.csv'), 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'ADDITIONAL' in row: raise Exception('The row wasn\'t parsed properly. Additional fields found %s. Add quotes to values that have commas in them.' % (row['ADDITIONAL'])) else: row_valuetype = row[u'VALUETYPE'].strip() if row_valuetype not in value_types.values_list('valuetype', flat=True): valuetype = models.ValueTypes() valuetype.valuetype = row_valuetype valuetype.category = 'undefined' valuetype.namespace = 'arches' valuetype.save() value_types = models.ValueTypes.objects.all() concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID']) category = value_types.get(valuetype=row_valuetype).category concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category}) except Exception as e: errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row)) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv'))) errors.append('\n\n\n\n') # insert and index the concpets for key in lookups.lookup: try: lookups.lookup[key]['concept'].save() except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc())) lookups.lookup[key]['concept'].index(scheme=top_concept) # insert the concept relations for relation in lookups.concept_relationships: sql = """ INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype) VALUES ('%s', '%s', '%s'); """%(relation['source'], relation['target'], relation['type']) #print sql try: cursor.execute(sql) except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start)) return errors