def model_authorship_group(self, record, data): if not data: return record.setdefault('_people', []) record.setdefault('created_by', []) authors = _as_list(data.get('primary_author')) mlap = MakeLinkedArtPerson() mlao = MakeLinkedArtOrganization() ordered_data = [] article_label = record['label'] creation_id = record['uri'] + '-Creation' creation = model.Creation(ident=creation_id, label=f'Creation of {article_label}') for a in authors: gaia_id = a['gaia_authority_id'] gaia_type = a['gaia_authority_type'] name = a['author_name'] roles = _as_list(a['author_role']) order = a['author_order'] ordered_data.append((order, name)) p = { 'label': name, 'name': name, } if gaia_type == 'Person': uri = self.helper.person_uri(gaia_id) p['uri'] = uri mlap(p) elif gaia_type == 'Corp': uri = self.helper.corporate_body_uri(gaia_id) p['uri'] = uri mlao(p) else: raise Exception( f'Unexpected type of authorship record: {gaia_type}') # uri = self.helper.make_proj_uri(gaia_type, 'GAIA', gaia_id) record['_people'].append(p) for role in roles: part = model.Creation(ident='', label=f'{role} Creation sub-event') part.carried_out_by = get_crom_object(p) cl = self.helper.role_type(role) if cl: part.classified_as = cl creation.part = part ordered_authors = [p[1] for p in sorted(ordered_data)] order_string = self.helper.ordered_author_string(ordered_authors) creation.referred_to_by = vocab.Note(ident='', content=order_string) record['created_by'].append(creation)
def __call__(self, data): self.model_concept_group(data, data['concept_group']) for tg in _as_list(data.get('term_group')): self.model_term_group(data, tg) for mg in _as_list(data.get('exact_match_group')): self.model_exact_match_group(data, mg) for wg in _as_list(data.get('warrant_group')): self.model_warrant_group(data, wg) self.model_place(data) return data
def __call__(self, data): jid = data['record_desc_group']['record_id'] data['uri'] = self.helper.series_uri(jid) self.model_record_desc_group(data, data['record_desc_group']) self.model_series_group(data, data.get('series_group')) data.setdefault('label', f'Series ({jid})') for i, pg in enumerate(_as_list(data.get('publisher_group'))): self.model_publisher_group(data, pg, i) for sg in _as_list(data.get('sponsor_group')): self.model_sponsor_group(data, sg) self.model_journal(data) return data
def model_series_group(self, record, data): if not data: return record.setdefault('identifiers', []) record.setdefault('referred_to_by', []) record.setdefault('language', []) title = data.get('title') title_translated = data.get('title_translated') variant_titles = _as_list(data.get('variant_title')) related_titles = _as_list(data.get('related_title')) lang_docs = _as_list(data['lang_doc']) frequency = data.get('frequency') start_year = data.get('start_year') cease_year = data.get('cease_year') issn = data.get('issn') coden = data.get('coden') if title: record['label'] = title record['identifiers'].append( vocab.PrimaryName(ident='', content=title)) if title_translated: record['identifiers'].append( vocab.TranslatedTitle(ident='', content=title)) for vtitle in variant_titles: record['identifiers'].append(vocab.Title(ident='', content=vtitle)) for lang in lang_docs: l = self.helper.language_object_from_code(lang) if l: record['language'].append(l) if frequency: record['referred_to_by'].append( vocab.Note(ident='', content=frequency)) if start_year: record['_publishing_start_year'] = start_year if cease_year: record['_publishing_cease_year'] = cease_year if issn: record['identifiers'].append( vocab.IssnIdentifier(ident='', content=issn)) if coden: record['identifiers'].append( vocab.CodenIdentifier(ident='', content=coden))
def __call__(self, data): self.model_concept_group(data, data['concept_group']) pid = data['concept_group']['gaia_auth_id'] for tg in _as_list(data.get('term_group')): self.model_term_group(data, tg) for rg in _as_list(data.get('gaia_auth_relationship_group')): self.model_relationship_group(data, rg) for mg in _as_list(data.get('exact_match_group')): self.model_exact_match_group(data, mg) for wg in _as_list(data.get('warrant_group')): self.model_warrant_group(data, wg) data.setdefault('label', f'Person ({pid})') self.model_person(data) return data
def model_notes_group(self, record, data): if not data: return record.setdefault('_declared_languages', set()) record.setdefault('language', []) record.setdefault('identifiers', []) record.setdefault('referred_to_by', []) lang_docs = _as_list(data.get('lang_doc')) lang_summaries = _as_list(data.get('lang_summary')) isbns = _as_list(data.get('isbn')) issns = _as_list(data.get('issn')) citation_note = data.get('citation_note') inotes = _as_list(data.get('internal_note')) for lang in lang_summaries: record['_declared_languages'].add(lang) for lang in lang_docs: record['_declared_languages'].add(lang) l = self.helper.language_object_from_code(lang) if l: record['language'].append(l) for isbn in isbns: num = isbn.get('isbn_number') q = isbn.get('isbn_qualifier') if num: i = vocab.IsbnIdentifier(ident='', content=num) if q: i.referred_to_by = vocab.Note(ident='', content=q) record['identifiers'].append(i) for issn in issns: i = vocab.IssnIdentifier(ident='', content=issn) record['identifiers'].append(i) if citation_note: record['referred_to_by'].append( vocab.Citation(ident='', content=citation_note)) for inote in inotes: record['referred_to_by'].append( vocab.Note(ident='', content=inote['note']))
def model_index_group(record, data): record.setdefault('indexing', []) term = data['index_term'] auth_type = data.get('gaia_auth_type') opids = _as_list(data.get('other_persistent_id')) for opid in opids: eid = opid['external_id'] ename = opid['external_name'] if ename in ('AAT', 'ULAN', 'TGN'): v = ename.lower() uri = f'http://vocab.getty.edu/{v}/{eid}' t = model.Type(ident=uri, label=term) record['indexing'].append(t)
def __call__(self, data): pid = data['concept_group']['gaia_auth_id'] data.setdefault('identifiers', []) data['identifiers'].append( self.helper.gci_number_id(pid, id_class=vocab.SystemNumber)) self.model_concept_group(data, data['concept_group']) for tg in _as_list(data.get('term_group')): self.model_term_group(data, tg) self.add_uri(data) self.model_place(data) return data
def __call__(self, data, language_code_map): ''' Given an XML element representing an AATA record, extract information about the "article" (this might be a book, chapter, journal article, etc.) including: * document type * titles and title translations * organizations and their role (e.g. publisher) * creators and thier role (e.g. author, editor) * abstracts * languages This information is returned in a single `dict`. ''' rid = data['record_id_group']['record_id'] data['uri'] = self.helper.make_proj_uri('Article', rid) self.model_title_group(data, data['title_group']) data.setdefault('label', f'Article ({rid})' ) # this should get overridden in model_title_group) self.model_record_desc_group(data, data['record_desc_group']) self.model_record_id_group(data, data['record_id_group']) self.model_authorship_group(data, data.get('authorship_group')) self.model_imprint_group(data, data.get('imprint_group')) self.model_physical_desc_group(data, data.get('physical_desc_group')) self.model_notes_group(data, data.get('notes_group')) self.model_abstract_group(data, data.get('abstract_group')) for cg in _as_list(data.get('classification_group')): self.model_classification_group(data, cg) for ig in _as_list(data.get('index_group')): self.model_index_group(data, ig) self.add_title(data) self.model_article(data) return data
def model_title_group(self, record, data): record.setdefault('identifiers', []) primary = data['primary'] title = primary.get('title') translated = primary.get('title_translated') variants = _as_list(primary.get('title_variant')) if title: record['label'] = title if 'title' in record: raise Exception(f'existing title!') record['title'] = title if translated: record['identifiers'].append( vocab.TranslatedTitle(ident='', content=translated)) for v in variants: record['identifiers'].append(vocab.Title(ident='', content=v))
def add_uri(self, data): cg = data.get('concept_group', {}) pid = cg['gaia_auth_id'] terms = _as_list(data.get('term_group', [])) names = [ t.get('term_name') for t in terms if t.get('term_type') == 'main' ] name = names[0] if names else None place_type = cg.get('place_type') country = data.get('country') state = data.get('state') names = [] if country: names.append(country['label']) if state: names.append(state['label']) names.append(name) data['uri'] = self.helper.place_uri(pid, *names, place_type=place_type)
def model_record_id_group(self, record, data): record.setdefault('identifiers', []) record.setdefault('part_of', []) rid = data['record_id'] aata_ids = _as_list(data.get('aata_id')) cid = data.get('collective_rec_id') record['identifiers'] += [ self.helper.gci_number_id(aid) for aid in aata_ids ] record['identifiers'] += [ self.helper.gci_number_id(rid, id_class=vocab.SystemNumber) ] if cid: uri = self.helper.article_uri(cid) parent = {'uri': uri} make_la_lo = MakeLinkedArtLinguisticObject() make_la_lo(parent) record['part_of'].append(parent)
def model_concept_group(self, record, data): record.setdefault('referred_to_by', []) record.setdefault('identifiers', []) record.setdefault('_places', []) # for extraction/serialization by the pipeline record.setdefault('places', []) # for pipeline.linkedart modeling code gaia_id = data['gaia_auth_id'] snote = data.get('scope_note') inote = data.get('internal_note') snfnote = data.get('source_not_found_note') locations = _as_list(data.get('location', [])) record['uri'] = self.helper.corporate_body_uri(gaia_id) record['identifiers'].append(self.helper.gci_number_id(gaia_id, id_class=vocab.SystemNumber)) if snote: record['referred_to_by'].append(vocab.Note(ident='', content=snote)) if inote: record['referred_to_by'].append(vocab.InternalNote(ident='', content=inote)) if snfnote: record['referred_to_by'].append(vocab.InternalNote(ident='', content=snfnote)) mlap = MakeLinkedArtPlace() for loc in locations: geog_id = loc.get('gaia_geog_id') if geog_id: geog_uri = self.helper.place_uri(geog_id) geog_data = { 'uri': geog_uri, 'identifiers': [], } geog_name = loc.get('location_string') if geog_name: geog_data['label'] = geog_name geog_data['name'] = geog_name mlap(geog_data) record['places'].append(geog_data) record['_places'].append(geog_data)
def model_imprint_group(self, record, data): if not data: return record.setdefault('referred_to_by', []) record.setdefault('used_for', []) record.setdefault('part_of', []) record.setdefault('_activities', []) record.setdefault('_groups', []) record.setdefault('_places', []) record.setdefault('identifiers', []) edition = data.get('edition') series_number = data.get('series_number') doi = data.get('doi') coden = data.get('coden') website = data.get('website_address') publishers = _as_list(data.get('publisher')) distributors = _as_list(data.get('distributor')) journal = data.get('journal_info') # imprint_group/journal_info/aata_journal_id # imprint_group/journal_info/aata_issue_id degree = data.get('thesis_degree') tr = data.get('technical_report_number') if edition: record['referred_to_by'].append( vocab.EditionStatement(ident='', content=edition)) if series_number: record['referred_to_by'].append( vocab.Note(ident='', content=series_number)) # TODO: classify this Note if doi: record['identifiers'].append( vocab.DoiIdentifier(ident='', content=doi)) if coden: record['identifiers'].append( vocab.CodenIdentifier(ident='', content=coden)) if website: record['referred_to_by'].append( vocab.Note(ident='', content=website)) article_label = record['label'] for i, publisher in enumerate(publishers): corp_id = publisher.get('gaia_corp_id') geog_id = publisher.get('publisher_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-pub-{i}' a = vocab.Publishing(ident=a_uri, label=f'Publishing of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) for i, distributor in enumerate(distributors): corp_id = distributor.get('gaia_corp_id') geog_id = distributor.get('distributor_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-dist-{i}' a = vocab.Distributing(ident=a_uri, label=f'Distribution of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) if journal: journal_id = journal.get('aata_journal_id') issue_id = journal.get('aata_issue_id') issue_uri = self.helper.issue_uri(journal_id, issue_id) issue = vocab.IssueText(ident=issue_uri) record['part_of'].append(add_crom_data({'uri': issue_uri}, issue)) if degree: record['referred_to_by'].append( vocab.Note(ident='', content=degree)) if tr: record['identifiers'].append(model.Identifier( ident='', content=tr)) # TODO: classify this Identifier