def make_place(self, data: dict, base_uri=None): ''' Given a dictionary representing data about a place, construct a model.Place object, assign it as the crom data in the dictionary, and return the dictionary. The dictionary keys used to construct the place object are: - name - type (one of: 'City', 'State', 'Province', or 'Country') - part_of (a recursive place dictionary) ''' unique_locations = self.unique_locations TYPES = { 'city': vocab.instances['city'], 'province': vocab.instances['province'], 'state': vocab.instances['province'], 'country': vocab.instances['nation'], } if data is None: return None type_name = data.get('type', 'place').lower() name = data.get('name') label = name parent_data = data.get('part_of') place_type = TYPES.get(type_name) parent = None if parent_data: parent_data = self.make_place(parent_data, base_uri=base_uri) parent = get_crom_object(parent_data) if label: label = f'{label}, {parent._label}' placeargs = {} if label: placeargs['label'] = label if data.get('uri'): placeargs['ident'] = data['uri'] elif label in unique_locations: data['uri'] = self.make_proj_uri('PLACE', label) placeargs['ident'] = data['uri'] elif base_uri: data['uri'] = base_uri + urllib.parse.quote(label) placeargs['ident'] = data['uri'] p = model.Place(**placeargs) if place_type: p.classified_as = place_type if name: p.identified_by = model.Name(ident='', content=name) else: warnings.warn(f'Place with missing name on {p.id}') if parent: p.part_of = parent data['part_of'] = parent_data return add_crom_data(data=data, what=p)
def nation(self, name, ident): place = vocab.Place( ident= 'tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-' + name, label=name) nation = model.Place(ident=ident) place.classified_as = vocab.instances['nation'] place.identified_by = model.Name(ident='', content=name) return place
def set_properties(self, data, thing): super().set_properties(data, thing) with suppress(ValueError, TypeError): ulan = int(data.get('ulan')) if ulan: thing.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{ulan}') if 'name' in data: title_type = model.Type( ident='http://vocab.getty.edu/aat/300417193', label='Title') name = data['name'] if name: if isinstance(name, str): set_la_name(thing, name, title_type, set_label=True) elif isinstance(name, (list, tuple)): value, *properties = name n = model.Name(ident='', content=value) n.classified_as = title_type self.set_lo_properties(n, *properties) thing.identified_by = n for uri in data.get('exact_match', []): thing.exact_match = uri for sdata in data.get('sojourns', []): label = sdata.get('label', 'Sojourn activity') stype = sdata.get('type', model.Activity) act = stype(ident='', label=label) ts = get_crom_object(sdata.get('timespan')) place = get_crom_object(sdata.get('place')) act.timespan = ts act.took_place_at = place thing.carried_out = act self.set_referred_to_by(sdata, act) # Locations are names of residence places (P74 -> E53) # XXX FIXME: Places are their own model if 'places' in data: for p in data['places']: if isinstance(p, model.Place): pl = p elif isinstance(p, dict): pl = get_crom_object(p) else: pl = model.Place(ident='', label=p) #pl._label = p['label'] #nm = model.Name() #nm.content = p['label'] #pl.identified_by = nm #for s in p['sources']: # l = model.LinguisticObject(ident="urn:uuid:%s" % s[1]) # l._label = _row_label(s[2], s[3], s[4]) # pl.referred_to_by = l thing.residence = pl
def model_publisher_group(self, record, data, seq): record.setdefault('_publishings', []) series_label = record['label'] corp_id = data.get('gaia_corp_id') geog_id = data.get('gaia_geog_id') a_uri = record['uri'] + f'-pub-{seq}' cb_label = f' by CB{corp_id}' if corp_id else f' by publisher #{seq}' a = vocab.Publishing(ident=a_uri, label=f'Publishing of {series_label}' + cb_label) if corp_id: uri = self.helper.corporate_body_uri(corp_id) a.carried_out_by = model.Group(ident=uri) if geog_id: uri = self.helper.place_uri(geog_id) a.took_place_at = model.Place(ident=uri) record['_publishings'].append(a)
def make_place(self, data: dict, base_uri=None): ''' Given a dictionary representing data about a place, construct a model.Place object, assign it as the crom data in the dictionary, and return the dictionary. The dictionary keys used to construct the place object are: - name - type (one of: 'City', 'State', 'Province', 'Country', or 'Sovereign') - part_of (a recursive place dictionary) If the name matches a known unique location (derived from the unique_locations service data), the normal recursive handling of part_of data is bypassed, using the ''' # unique_locations = self.unique_locations canonical_location_names = self.canonical_location_names TYPES = { 'city': vocab.instances['city'], 'county': vocab.instances['county'], 'province': vocab.instances['province'], 'state': vocab.instances['province'], 'country': vocab.instances['nation'], 'sovereign': vocab.instances['sovereign'], } if data is None: return None type_name = data.get('type', 'place').lower() name = data.get('name') si = self.static_instances names = data.get('names', []) label = name parent_data = data.get('part_of') place_type = TYPES.get(type_name) parent = None if name.casefold() in canonical_location_names: name = canonical_location_names.get(name.casefold(), name) label = name elif parent_data: parent_data = self.make_place(parent_data, base_uri=base_uri) parent = get_crom_object(parent_data) if label: label = f'{label}, {parent._label}' placeargs = {} p = None if si: p = si.get_instance('Place', name) if not p: p = si.get_instance('Place', label) if p: # this is a static instance. we need to re-thread the part_of relationship # in the data dictionary, because the serialization depends on the dictionary # data, not the properties of the modeled object # from cromulent.model import factory # print(f'PLACE: {name} => {factory.toString(p, False)}') add_crom_data(data=data, what=p) queue = [data] while queue: place_data = queue.pop(0) place = get_crom_object(place_data) parents = getattr(place, 'part_of', []) or [] if parents: for parent in parents: if parent: if 'part_of' not in place_data: parent_data = add_crom_data(data={}, what=parent) place_data['part_of'] = parent_data else: parent_data = add_crom_data( data=place_data['part_of'], what=parent) queue.append(parent_data) elif 'part_of' in place_data: parent_data = self.make_place(place_data['part_of'], base_uri=base_uri) queue.append(parent_data) if p: return data if label: placeargs['label'] = label if data.get('uri'): placeargs['ident'] = data['uri'] # elif label.casefold() in canonical_location_names: # label = canonical_location_names[label.casefold()] # data['uri'] = self.make_shared_uri('PLACE', label) # placeargs['ident'] = data['uri'] elif base_uri: data['uri'] = base_uri + urllib.parse.quote(label) placeargs['ident'] = data['uri'] if not p: p = model.Place(**placeargs) if place_type: p.classified_as = place_type if name: p.identified_by = vocab.PrimaryName(ident='', content=name) else: warnings.warn(f'Place with missing name on {p.id}') for name in names: if name: p.identified_by = model.Name(ident='', content=name) if parent: p.part_of = parent data['part_of'] = parent_data return add_crom_data(data=data, what=p)
def model_imprint_group(self, record, data): if not data: return record.setdefault('referred_to_by', []) record.setdefault('used_for', []) record.setdefault('part_of', []) record.setdefault('_activities', []) record.setdefault('_groups', []) record.setdefault('_places', []) record.setdefault('identifiers', []) edition = data.get('edition') series_number = data.get('series_number') doi = data.get('doi') coden = data.get('coden') website = data.get('website_address') publishers = _as_list(data.get('publisher')) distributors = _as_list(data.get('distributor')) journal = data.get('journal_info') # imprint_group/journal_info/aata_journal_id # imprint_group/journal_info/aata_issue_id degree = data.get('thesis_degree') tr = data.get('technical_report_number') if edition: record['referred_to_by'].append( vocab.EditionStatement(ident='', content=edition)) if series_number: record['referred_to_by'].append( vocab.Note(ident='', content=series_number)) # TODO: classify this Note if doi: record['identifiers'].append( vocab.DoiIdentifier(ident='', content=doi)) if coden: record['identifiers'].append( vocab.CodenIdentifier(ident='', content=coden)) if website: record['referred_to_by'].append( vocab.Note(ident='', content=website)) article_label = record['label'] for i, publisher in enumerate(publishers): corp_id = publisher.get('gaia_corp_id') geog_id = publisher.get('publisher_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-pub-{i}' a = vocab.Publishing(ident=a_uri, label=f'Publishing of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) for i, distributor in enumerate(distributors): corp_id = distributor.get('gaia_corp_id') geog_id = distributor.get('distributor_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-dist-{i}' a = vocab.Distributing(ident=a_uri, label=f'Distribution of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) if journal: journal_id = journal.get('aata_journal_id') issue_id = journal.get('aata_issue_id') issue_uri = self.helper.issue_uri(journal_id, issue_id) issue = vocab.IssueText(ident=issue_uri) record['part_of'].append(add_crom_data({'uri': issue_uri}, issue)) if degree: record['referred_to_by'].append( vocab.Note(ident='', content=degree)) if tr: record['identifiers'].append(model.Identifier( ident='', content=tr)) # TODO: classify this Identifier
def setup_static_instances(self): ''' These are instances that are used statically in the code. For example, when we provide attribution of an identifier to Getty, or use a Lugt number, we need to serialize the related Group or Person record for that attribution, even if it does not appear in the source data. ''' lugt_ulan = 500321736 gri_ulan = 500115990 gci_ulan = 500115991 knoedler_ulan = 500304270 GETTY_PSCP_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Project for the Study of Collecting and Provenance') GETTY_GPI_URI = self.helper.make_shared_uri('STATIC', 'ORGANIZATION', 'Getty Provenance Index') GETTY_GRI_URI = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', 'JPGM') GETTY_GCI_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Getty Conservation Institute') LUGT_URI = self.helper.make_proj_uri('PERSON', 'ULAN', lugt_ulan) KNOEDLER_URI = self.helper.make_shared_uri('ORGANIZATION', 'ULAN', str(knoedler_ulan)) NEWYORK_URI = self.helper.make_shared_uri('PLACE', 'USA', 'NY', 'New York') gci = model.Group(ident=GETTY_GCI_URI, label='Getty Conservation Institute') gci.identified_by = vocab.PrimaryName( ident='', content='Getty Conservation Institute') gci.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gci_ulan}') gri = model.Group(ident=GETTY_GRI_URI, label='Getty Research Institute') gri.identified_by = vocab.PrimaryName( ident='', content='Getty Research Institute') gri.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gri_ulan}') gpi = model.Group(ident=GETTY_GPI_URI, label='Getty Provenance Index') gpi.identified_by = vocab.PrimaryName(ident='', content='Getty Provenance Index') pscp = model.Group( ident=GETTY_PSCP_URI, label='Project for the Study of Collecting and Provenance') pscp.identified_by = vocab.PrimaryName( ident='', content='Project for the Study of Collecting and Provenance') lugt = model.Person(ident=LUGT_URI, label='Frits Lugt') lugt.identified_by = vocab.PrimaryName(ident='', content='Frits Lugt') lugt.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{lugt_ulan}') knoedler_name = 'M. Knoedler & Co.' knoedler = model.Group(ident=KNOEDLER_URI, label=knoedler_name) knoedler.identified_by = vocab.PrimaryName(ident='', content=knoedler_name) knoedler.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{knoedler_ulan}') newyork_name = 'New York, NY' newyork = model.Place(ident=NEWYORK_URI, label=newyork_name) newyork.identified_by = vocab.PrimaryName(ident='', content=newyork_name) materials = {} if 'materials' in self.services: materials.update({ aat: model.Material(ident=f'http://vocab.getty.edu/aat/{aat}', label=label) for aat, label in self.services['materials'].items() }) places = self._static_place_instances() places.update({'newyork': newyork}) db_people = self.static_db_instance( 'PEOPLE', name='STAR Person Authority Database', creator=gpi) db_knoedler = self.static_db_instance('Knoedler', name='STAR Knoedler Database', creator=gpi) db_sales_events = self.static_db_instance( 'Sales', 'Descriptions', name='STAR Sales Catalogue Database', creator=gpi) db_sales_catalogs = self.static_db_instance( 'Sales', 'Catalogue', name='STAR Physical Sales Catalogue Database', creator=gpi) db_sales_contents = self.static_db_instance( 'Sales', 'Contents', name='STAR Sales Contents Database', creator=gpi) instances = defaultdict(dict) instances.update({ 'LinguisticObject': { 'db-people': db_people, 'db-knoedler': db_knoedler, 'db-sales_events': db_sales_events, 'db-sales_catalogs': db_sales_catalogs, 'db-sales_contents': db_sales_contents, }, 'Group': { 'gci': gci, 'pscp': pscp, 'gri': gri, 'gpi': gpi, 'knoedler': knoedler }, 'Person': { 'lugt': lugt }, 'Material': materials, 'Place': places }) return instances