def model_person_or_group(self, data: dict, a: dict, attribution_group_types, attribution_group_names, role='artist', seq_no=0, sales_record=None): if get_crom_object(a): return a mods = a['modifiers'] artist = self.helper.add_person(a, record=sales_record, relative_id=f'artist-{seq_no+1}', role=role) artist_label = a['label'] person = get_crom_object(a) if mods: GROUP_TYPES = set(attribution_group_types.values()) GROUP_MODS = { k for k, v in attribution_group_types.items() if v in GROUP_TYPES } if mods.intersects(GROUP_MODS): mod_name = list(GROUP_MODS & mods)[0] # TODO: use all matching types? clsname = attribution_group_types[mod_name] cls = getattr(vocab, clsname) group_name = attribution_group_names[clsname] group_label = f'{group_name} {artist_label}' a['label'] = group_label # The group URI is just the person URI with a suffix. In any case # where the person is merged, the group should be merged as well. # For example, when if "RUBENS" is merged, "School of RUBENS" should # also be merged. group_id = a['uri'] + f'-{clsname}' group = cls(ident=group_id, label=group_label) group.identified_by = model.Name(ident='', content=group_label) formation = model.Formation( ident='', label=f'Formation of {group_label}') formation.influenced_by = person group.formed_by = formation pi_record_no = data['pi_record_no'] group_uri_key = ('GROUP', 'PI', pi_record_no, f'{role}Group') group_data = { 'uri': group_id, 'uri_keys': group_uri_key, 'modifiers': mods, 'label': group_label } add_crom_data(group_data, group) data['_organizations'].append(group_data) return group_data add_crom_data(a, artist) return a
def __call__(self, data:dict, event_properties, date_modifiers): '''Add modeling for an auction event based on properties of the supplied `data` dict.''' record = get_crom_object(data['_catalog']) cno = data['catalog_number'] sale_type = data.get('non_auction_flag', 'Auction') ts, begin, end = timespan_from_bound_components( data, date_modifiers, 'sale_begin_', 'begin', 'sale_end_', 'eoe' ) event_properties['auction_dates'][cno] = (ts, begin, end) event_properties['auction_date_label'][cno] = ts._label event_date_label = event_properties['auction_date_label'].get(cno) auction, uid, uri = self.helper.sale_event_for_catalog_number(cno, sale_type, date_label=event_date_label) auction.referred_to_by = record auction.identified_by = model.Name(ident='', content=auction._label) data['uid'] = uid data['uri'] = uri add_crom_data(data=data, what=auction) catalog = get_crom_object(data['_catalog']) data['_record'] = data['_catalog'] return data
def attach_source_catalog(self, data, acq, people): phys_catalog_notes = {} phys_catalogs = {} for p in people: if '_name_source_catalog_key' in p: source_catalog_key = p['_name_source_catalog_key'] so_cno, so_owner, so_copy = source_catalog_key if source_catalog_key in phys_catalog_notes: hand_notes = phys_catalog_notes[source_catalog_key] catalog = phys_catalogs[source_catalog_key] else: hand_notes = self.helper.physical_catalog_notes( so_cno, so_owner, so_copy) catalog_uri = self.helper.physical_catalog_uri( so_cno, so_owner, so_copy) catalog = model.HumanMadeObject(ident=catalog_uri) phys_catalog_notes[source_catalog_key] = hand_notes phys_catalogs[source_catalog_key] = catalog catalog.carries = hand_notes acq.referred_to_by = hand_notes data['_phys_catalog_notes'] = [ add_crom_data(data={}, what=n) for n in phys_catalog_notes.values() ] data['_phys_catalogs'] = [ add_crom_data(data={}, what=c) for c in phys_catalogs.values() ]
def add_imprint_orgs(data): ''' Given a `dict` representing an "article," extract the "imprint organization" records and their role (e.g. publisher, distributor), and add add a new 'organizations' key to the dictionary containing an array of `dict`s representing the organizations. Also construct an Activity for each organization's role, and associate it with the article and organization (article --role--> organization). The resulting organization `dict` will contain these keys: * `_aata_record_id`: The identifier of the corresponding article * `_aata_record_organization_seq`: A integer identifying this organization (unique within the scope of the article) * `label`: The name of the organization * `role`: The role the organization played in the article's creation (e.g. `'Publishing'`) * `properties`: A `dict` of additional properties associated with this organization's role in the article creation (e.g. `DatesOfPublication`) * `names`: A `list` of names this organization may be identified by * `identifiers`: A `list` of (identifier, identifier type) pairs * `uid`: A unique ID for this organization * `uuid`: A unique UUID for this organization used in assigning it a URN ''' lod_object = get_crom_object(data) organizations = [] for o in data.get('_organizations', []): org = {k: v for k, v in o.items()} org_obj = vocab.Group(ident=org['uri']) add_crom_data(data=org, what=org_obj) event = model.Activity() # TODO: change to vocab.Publishing for publishing activities lod_object.used_for = event event.carried_out_by = org_obj properties = o.get('properties') role = o.get('role') if role is not None: activity_names = { 'distributor': 'Distributing', 'publisher': 'Publishing', # TODO: Need to also handle roles: Organization, Sponsor, University } if role.lower() in activity_names: event_label = activity_names[role.lower()] event._label = event_label else: print('*** No/unknown organization role (%r) found for imprint_group in %s:' % ( role, lod_object,)) # pprint.pprint(o) if role == 'Publisher' and 'DatesOfPublication' in properties: pubdate = properties['DatesOfPublication'] span = CleanDateToSpan.string_to_span(pubdate) if span is not None: event.timespan = span organizations.append(org) data['organizations'] = organizations return data
def __call__(self, data, language_code_map): ''' Given a `dict` representing an "article," extract the abstract records. yield a new `dict`s for each such record. The resulting asbtract `dict` will contain these keys: * `_LOD_OBJECT`: A `model.LinguisticObject` object representing the abstract * `_aata_record_id`: The identifier of the corresponding article * `_aata_record_author_seq`: A integer identifying this abstract (unique within the scope of the article) * `content`: The text content of the abstract * `language`: A model object representing the declared langauge of the abstract (if any) * `author_abstract_flag`: A boolean value indicating whether the article's authors also authored the abstract * `identifiers`: A `list` of (identifier, identifier type) pairs * `_authors`: The authorship information from the input article `dict` * `uid`: A unique ID for this abstract * `parent`: The model object representing the corresponding article * `parent_data`: The `dict` representing the corresponding article ''' lod_object = get_crom_object(data) for a in data.get('_abstracts', []): abstract_dict = { k: v for k, v in a.items() if k not in ('language', ) } abstract_uri = self.helper.make_proj_uri( 'Abstract', data['_aata_record_id'], a['_aata_record_abstract_seq']) content = a.get('content') abstract = vocab.Abstract(ident=abstract_uri, content=content) abstract.refers_to = lod_object langcode = a.get('language') if langcode is not None: language = self.helper.language_object_from_code( langcode, language_code_map) if language is not None: abstract.language = language abstract_dict['language'] = language if '_authors' in data: abstract_dict['_authors'] = data['_authors'] # create a uid based on the AATA record id, the sequence number of the abstract # in that record, and which author we're handling right now abstract_dict.update({ 'parent_data': data, 'uri': abstract_uri, }) add_crom_data(data=abstract_dict, what=abstract) yield abstract_dict
def add_object_type(data, vocab_type_map): '''Add appropriate type information for an object based on its 'object_type' name''' typestring = data.get('object_type', '') if typestring in vocab_type_map: clsname = vocab_type_map.get(typestring, None) otype = getattr(vocab, clsname) add_crom_data(data=data, what=otype(ident=data['uri'])) elif ';' in typestring: parts = [s.strip() for s in typestring.split(';')] if all([s in vocab_type_map for s in parts]): types = [getattr(vocab, vocab_type_map[s]) for s in parts] obj = vocab.make_multitype_obj(*types, ident=data['uri']) add_crom_data(data=data, what=obj) else: warnings.warn( f'*** Not all object types matched for {typestring!r}') add_crom_data(data=data, what=model.HumanMadeObject(ident=data['uri'])) else: warnings.warn(f'*** No object type for {typestring!r}') add_crom_data(data=data, what=model.HumanMadeObject(ident=data['uri'])) parent = data['parent_data'] coll_data = parent.get('_lot_object_set') if coll_data: coll = get_crom_object(coll_data) if coll: data['member_of'] = [coll] return data
def __call__(self, data: dict): '''Add modeling for an auction event based on properties of the supplied `data` dict.''' cno = data['catalog_number'] sale_type = data.get('non_auction_flag', 'Auction') auction, uid, uri = self.helper.sale_event_for_catalog_number( cno, sale_type) auction.identified_by = model.Name(ident='', content=auction._label) data['uid'] = uid data['uri'] = uri add_crom_data(data=data, what=auction) catalog = get_crom_object(data['_catalog']) data['_record'] = data['_catalog'] return data
def __call__(self, data: dict, non_auctions): '''Add modeling for auction catalogs as linguistic objects''' cno = data['auction_of_lot']['catalog_number'] rec_num = data['pi_record_no'] record_uri = self.helper.make_proj_uri('CATALOG', cno, 'RECORD', rec_num) record = vocab.ParagraphText( ident=record_uri, label=f'Sale recorded in catalog (record number {rec_num})') data['_sale_record'] = add_crom_data({'uri': record_uri}, record) page_id = data.get('pg') pdf_page_id = data.get('ppg') if not page_id: yield data return sale_type = non_auctions.get(cno, data.get('non_auction_flag')) if sale_type: non_auctions[cno] = sale_type sale_type = sale_type or 'Auction' catalog = self.helper.catalog_text(cno, sale_type) cdata = add_crom_data(data={'uri': catalog.id}, what=catalog) idents = [ vocab.PageNumber(ident='', content=page_id), ] if pdf_page_id: idents.append( vocab.make_multitype_obj(vocab.PageNumber, vocab.OrderNumber, ident='', content=pdf_page_id, label=f'Page Order')) data['_text_page'] = { 'uri': self.helper.make_proj_uri('CATALOG', cno, 'Page', page_id), 'object_type': vocab.PageTextForm, 'label': f'Sale Catalog {cno}, Page {page_id}', 'identifiers': idents, 'referred_to_by': [], 'part_of': [cdata], 'part': [], } mlo = MakeLinkedArtLinguisticObject() mlo(data['_text_page']) yield data
def _populate_object_visual_item(self, data: dict, subject_genre): hmo = get_crom_object(data) title = data.get('title') title = truncate_with_ellipsis(title, 100) or title # The visual item URI is just the object URI with a suffix. When URIs are # reconciled during prev/post sale rewriting, this will allow us to also reconcile # the URIs for the visual items (of which there should only be one per object) vi_uri = hmo.id + '-VisItem' vi = model.VisualItem(ident=vi_uri) vidata = {'uri': vi_uri} if title: vidata['label'] = f'Visual work of “{title}”' sales_record = get_crom_object(data['_record']) vidata['names'] = [(title, {'referred_to_by': [sales_record]})] for key in ('genre', 'subject'): if key in data: values = [v.strip() for v in data[key].split(';')] for value in values: for prop, mapping in subject_genre.items(): if value in mapping: aat_url = mapping[value] type = model.Type(ident=aat_url, label=value) setattr(vi, prop, type) data['_visual_item'] = add_crom_data(data=vidata, what=vi) hmo.shows = vi
def add_sellers(self, data: dict, sale_type, transaction, sellers, rel, source=None): hmo = get_crom_object(data) parent = data['parent_data'] auction_data = parent['auction_of_lot'] lot_object_key = object_key(auction_data) cno, lno, date = lot_object_key lot = get_crom_object(parent['_event_causing_prov_entry']) ts = getattr(lot, 'timespan', None) prev_procurements = [] tx_label_args = tuple([self.helper, sale_type, 'Sold', rel] + list(lot_object_key)) for i, seller_data in enumerate(sellers): seller = get_crom_object(seller_data) tx_uri = hmo.id + f'-seller-{i}-Prov' tx, acq = self.related_procurement(hmo, tx_label_args, current_ts=ts, buyer=seller, previous=True, ident=tx_uri, make_label=prov_entry_label) self.attach_source_catalog(data, acq, [seller_data]) if source: tx.referred_to_by = source prev_procurements.append(add_crom_data(data={}, what=tx)) data['_prov_entries'] += prev_procurements return prev_procurements
def __call__(self, data: dict, non_auctions): '''Add modeling for the entry describing a physical auction catalog in the PSCP dataset.''' cno = data['catalog_number'] owner = data['owner_code'] copy = data['copy_number'] rec_num = data['star_record_no'] sale_type = non_auctions.get(cno, data.get('non_auction_flag', 'Auction')) keys = [v for v in [cno, owner, copy] if v] record_uri = self.helper.make_proj_uri('ENTRY', 'PHYS-CAT', *keys) content = data['star_csv_data'] catalog_label = self.helper.physical_catalog_label( cno, sale_type, owner, copy) row_name = f'STAR Entry for Physical {catalog_label}' row = vocab.EntryTextForm(ident=record_uri, content=content, label=row_name) row.part_of = self.helper.static_instances.get_instance( 'LinguisticObject', 'db-sales_catalogs') creation = model.Creation(ident='') creation.carried_out_by = self.helper.static_instances.get_instance( 'Group', 'gpi') row.created_by = creation row.identified_by = self.helper.gpi_number_id(rec_num, vocab.StarNumber) row.identified_by = vocab.PrimaryName(ident='', content=row_name) data['_catalog_record'] = add_crom_data({'uri': record_uri}, row) yield data
def handle_places(self, data): base_uri = self.helper.make_proj_uri('PLACE', '') for loc in data.get('locations', []): l = loc.get('location') if l: current = parse_location_name(l, uri_base=self.helper.proj_prefix) place_data = self.helper.make_place(current, base_uri=base_uri) data['places'].append(place_data) note = loc.get('location_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('location_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Residence in {l} ({date})') data['referred_to_by'].append(note) address = loc.get('address') if address: contact = model.Identifier(ident='', content=address) contact_data = add_crom_data(data={}, what=contact) data['contact_point'].append(contact_data) note = loc.get('address_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('address_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Address at {l} ({date})') data['referred_to_by'].append(note)
def __call__(self, data: dict, non_auctions): '''Add modeling for physical copies of an auction catalog''' catalog = get_crom_object(data['_catalog']) cno = data['catalog_number'] owner = data['owner_code'] copy = data['copy_number'] sale_type = non_auctions.get(cno, 'Auction') catalogObject = self.helper.physical_catalog(cno, sale_type, owner, copy) data['uri'] = catalogObject.id info = data.get('annotation_info') if info: catalogObject.referred_to_by = vocab.Note(ident='', content=info) catalogObject.carries = catalog add_crom_data(data=data, what=catalogObject) return data
def make_place(self, data: dict, base_uri=None): ''' Given a dictionary representing data about a place, construct a model.Place object, assign it as the crom data in the dictionary, and return the dictionary. The dictionary keys used to construct the place object are: - name - type (one of: 'City', 'State', 'Province', or 'Country') - part_of (a recursive place dictionary) ''' unique_locations = self.unique_locations TYPES = { 'city': vocab.instances['city'], 'province': vocab.instances['province'], 'state': vocab.instances['province'], 'country': vocab.instances['nation'], } if data is None: return None type_name = data.get('type', 'place').lower() name = data.get('name') label = name parent_data = data.get('part_of') place_type = TYPES.get(type_name) parent = None if parent_data: parent_data = self.make_place(parent_data, base_uri=base_uri) parent = get_crom_object(parent_data) if label: label = f'{label}, {parent._label}' placeargs = {} if label: placeargs['label'] = label if data.get('uri'): placeargs['ident'] = data['uri'] elif label in unique_locations: data['uri'] = self.make_proj_uri('PLACE', label) placeargs['ident'] = data['uri'] elif base_uri: data['uri'] = base_uri + urllib.parse.quote(label) placeargs['ident'] = data['uri'] p = model.Place(**placeargs) if place_type: p.classified_as = place_type if name: p.identified_by = model.Name(ident='', content=name) else: warnings.warn(f'Place with missing name on {p.id}') if parent: p.part_of = parent data['part_of'] = parent_data return add_crom_data(data=data, what=p)
def populate_destruction_events(self, data: dict, note, *, type_map, location=None): destruction_types_map = type_map hmo = get_crom_object(data) title = data.get('title') short_title = truncate_with_ellipsis(title, 100) or title r = re.compile( r'[Dd]estroyed(?: (?:by|during) (\w+))?(?: in (\d{4})[.]?)?') m = r.search(note) if m: method = m.group(1) year = m.group(2) # The destruction URI is just the object URI with a suffix. When URIs are # reconciled during prev/post sale rewriting, this will allow us to also reconcile # the URIs for the destructions (of which there should only be one per object) dest_uri = hmo.id + '-Destruction' d = model.Destruction(ident=dest_uri, label=f'Destruction of “{short_title}”') d.referred_to_by = vocab.Note(ident='', content=note) if year is not None: begin, end = date_cleaner(year) ts = timespan_from_outer_bounds(begin, end) ts.identified_by = model.Name(ident='', content=year) d.timespan = ts if method: with suppress(KeyError, AttributeError): type_name = destruction_types_map[method.lower()] otype = vocab.instances[type_name] event = model.Event( label= f'{method.capitalize()} event causing the destruction of “{short_title}”' ) event.classified_as = otype d.caused_by = event data['_events'].append(add_crom_data(data={}, what=event)) if location: current = parse_location_name( location, uri_base=self.helper.uid_tag_prefix) # The place URI used for destruction events is based on the object URI with # a suffix. When URIs are reconciled during prev/post sale rewriting, this # will allow us to also reconcile the URIs for the places of destruction # (of which there should only be one hierarchy per object) base_uri = hmo.id + '-Destruction-Place,' place_data = self.helper.make_place(current, base_uri=base_uri) place = get_crom_object(place_data) if place: data['_locations'].append(place_data) d.took_place_at = place hmo.destroyed_by = d
def __call__(self, data: dict, location_codes, unique_catalogs): '''Add information about the ownership of a physical copy of an auction catalog''' # Add the URI of this physical catalog to `unique_catalogs`. This data will be used # later to figure out which catalogs can be uniquely identified by a catalog number # and owner code (e.g. for owners who do not have multiple copies of a catalog). cno = data['catalog_number'] owner_code = data['owner_code'] copy_number = data.get('copy_number', '') owner_name = None entry_record = get_crom_object(data.get('_catalog')) with suppress(KeyError): owner_name = location_codes[owner_code] owner_uri = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', owner_code) data['_owner'] = { 'label': owner_name, 'uri': owner_uri, 'referred_to_by': [entry_record], 'identifiers': [ model.Name(ident='', content=owner_name), model.Identifier(ident='', content=str(owner_code)) ], } owner = model.Group(ident=owner_uri) owner.referred_to_by = entry_record add_crom_data(data['_owner'], owner) if not owner_code: warnings.warn(f'Setting empty identifier on {owner.id}') add_crom_data(data=data['_owner'], what=owner) catalog = get_crom_object(data) catalog.current_owner = owner owner_uri = self.helper.physical_catalog_uri( cno, owner_code, None ) # None here because we want a key that will stand in for all the copies belonging to a single owner copy_uri = self.helper.physical_catalog_uri(cno, owner_code, copy_number) unique_catalogs[owner_uri].add(copy_uri) return data
def handle_places(self, data): locations = {l.strip() for l in data.get('location', '').split(';')} - {''} base_uri = self.helper.make_proj_uri('PLACE', '') for l in locations: current = parse_location_name(l, uri_base=self.helper.proj_prefix) place_data = self.helper.make_place(current, base_uri=base_uri) data['places'].append(place_data) addresses = {l.strip() for l in data.get('address', '').split(';')} - {''} for address in addresses: contact = model.Identifier(ident='', content=address) contact_data = add_crom_data(data={}, what=contact) data['contact_point'].append(contact_data)
def populate_destruction_events(self, data: dict, note, *, type_map, location=None): destruction_types_map = type_map hmo = get_crom_object(data) title = data.get('title') short_title = truncate_with_ellipsis(title, 100) or title r = re.compile( r'[Dd]estroyed(?: (?:by|during) (\w+))?(?: in (\d{4})[.]?)?') m = r.search(note) if m: method = m.group(1) year = m.group(2) dest_id = hmo.id + '-Destr' d = model.Destruction(ident=dest_id, label=f'Destruction of “{short_title}”') d.referred_to_by = vocab.Note(ident='', content=note) if year is not None: begin, end = date_cleaner(year) ts = timespan_from_outer_bounds(begin, end) ts.identified_by = model.Name(ident='', content=year) d.timespan = ts if method: with suppress(KeyError, AttributeError): type_name = destruction_types_map[method.lower()] otype = vocab.instances[type_name] event = model.Event( label= f'{method.capitalize()} event causing the destruction of “{short_title}”' ) event.classified_as = otype d.caused_by = event data['_events'].append(add_crom_data(data={}, what=event)) if location: current = parse_location_name( location, uri_base=self.helper.uid_tag_prefix) base_uri = hmo.id + '-Place,' place_data = self.helper.make_place(current, base_uri=base_uri) place = get_crom_object(place_data) if place: data['_locations'].append(place_data) d.took_place_at = place hmo.destroyed_by = d
def model_sojourn(self, data, loc): base_uri = self.helper.make_proj_uri('PLACE', '') cb = data.get('corporate_body', False) sojourn_type = vocab.Establishment if cb else vocab.Residing sdata = { 'type': sojourn_type, 'referred_to_by': [], } verbatim_date = loc.get('address_date') if verbatim_date: date_range = date_cleaner(verbatim_date) if date_range: begin, end = date_range ts = timespan_from_outer_bounds(*date_range) ts.identified_by = model.Name(ident='', content=verbatim_date) sdata['timespan'] = add_crom_data( { 'address_date': verbatim_date, 'begin': begin, 'end': end }, ts) current = None l = loc.get('location') if l: current = parse_location_name(l, uri_base=self.helper.proj_prefix) address = loc.get('address') if address: current = { 'name': address, 'part_of': current, 'type': 'address', } for k in ('address_note', 'location_note'): note = loc.get(k) if note: sdata['referred_to_by'].append( vocab.Note(ident='', content=note)) if current: place_data = self.helper.make_place(current, base_uri=base_uri) data['_places'].append(place_data) sdata['place'] = place_data return sdata
def set_lot_objects(self, lot, cno, lno, auction_of_lot_uri, data, sale_type): '''Associate the set of objects with the auction lot.''' shared_lot_number = self.helper.shared_lot_number_from_lno(lno) set_type = vocab.AuctionLotSet if sale_type == 'Auction' else vocab.CollectionSet coll_label = f'Object Set for Lot {cno} {shared_lot_number}' coll = set_type(ident=f'{auction_of_lot_uri}-Set', label=coll_label) coll.identified_by = model.Name(ident='', content=coll_label) est_price = data.get('estimated_price') if est_price: self.set_possible_attribute(coll, 'dimension', est_price) start_price = data.get('start_price') if start_price: self.set_possible_attribute(coll, 'dimension', start_price) ask_price = data.get('ask_price') if ask_price: self.set_possible_attribute(coll, 'dimension', ask_price) lot.used_specific_object = coll data['_lot_object_set'] = add_crom_data(data={}, what=coll)
def __call__(self, data: dict, non_auctions): '''Add modeling for auction catalogs as linguistic objects''' cno = data['catalog_number'] # this information may either come from `data` (for the auction events branch of the pipeline) # or from `non_auctions` (for the catalogs branch, which lacks this information, # but will have access to the `non_auctions` service which was shared from the events branch) sale_type = non_auctions.get(cno, data.get('non_auction_flag')) if sale_type: non_auctions[cno] = sale_type sale_type = sale_type or 'Auction' catalog = self.helper.catalog_text(cno, sale_type) cdata = {'uri': catalog.id} puid = data.get('persistent_puid') if puid: puid_id = self.helper.gri_number_id(puid) catalog.identified_by = puid_id cdata['identifiers'] = [puid_id] data['_catalog'] = add_crom_data(data=cdata, what=catalog) yield data
def add_sellers(self, data: dict, sale_type, transaction, sellers, rel, source=None): hmo = get_crom_object(data) parent = data['parent_data'] auction_data = parent['auction_of_lot'] lot_object_key = object_key(auction_data) cno, lno, date = lot_object_key lot = get_crom_object(parent.get('_event_causing_prov_entry')) ts = getattr(lot, 'timespan', None) prev_procurements = [] tx_label_args = tuple([self.helper, sale_type, 'Sold', rel] + list(lot_object_key)) for i, seller_data in enumerate(sellers): seller = get_crom_object(seller_data) # The provenance entry for a seller's previous acquisition is specific to a # single transaction. Therefore, the provenance entry URI must not share a # prefix with the object URI, otherwise all such provenance entries are liable # to be merged during URI reconciliation as part of the prev/post sale rewriting. tx_uri = self.helper.prepend_uri_key(hmo.id, f'PROV,Seller-{i}') tx, acq = self.related_procurement(hmo, tx_label_args, current_ts=ts, buyer=seller, previous=True, ident=tx_uri, make_label=prov_entry_label) self.attach_source_catalog(data, acq, [seller_data]) if source: tx.referred_to_by = source prev_procurements.append(add_crom_data(data={}, what=tx)) data['_prov_entries'] += prev_procurements return prev_procurements
def __call__(self, data: dict): '''Add modeling for the entry describing a person/group in the PSCP PEOPLE dataset.''' recno = data['star_record_no'] auth_name = data.get('auth_name') record_uri = self.helper.make_proj_uri('ENTRY', 'PEOPLE', recno) content = data['star_csv_data'] record = vocab.EntryTextForm( ident=record_uri, label=f'Entry recorded in PSCP PEOPLE dataset for {auth_name}', content=content) creation = model.Creation(ident='') creation.carried_out_by = self.helper.static_instances.get_instance( 'Group', 'gpi') record.created_by = creation record.identified_by = self.helper.gpi_number_id( recno, vocab.StarNumber) record.identified_by = vocab.PrimaryName( ident='', content=f'STAR Person Authority Entry {recno}') record.part_of = self.helper.static_instances.get_instance( 'LinguisticObject', 'db-people') data['_entry_record'] = add_crom_data({'uri': record_uri}, record) yield data
def __call__(self, data: dict, non_auctions): '''Add modeling for auction catalogs as linguistic objects''' cno = data['catalog_number'] rec_num = data['star_record_no'] # this information may either come from `data` (for the auction events branch of the pipeline) # or from `non_auctions` (for the catalogs branch, which lacks this information, # but will have access to the `non_auctions` service which was shared from the events branch) sale_type = non_auctions.get(cno, data.get('non_auction_flag')) if sale_type: non_auctions[cno] = sale_type sale_type = sale_type or 'Auction' catalog = self.helper.catalog_text(cno, sale_type) content = data['star_csv_data'] row = vocab.Transcription(ident='', content=content) row.part_of = self.helper.static_instances.get_instance( 'LinguisticObject', 'db-sales_events') creation = vocab.TranscriptionProcess(ident='') creation.carried_out_by = self.helper.static_instances.get_instance( 'Group', 'gpi') row.created_by = creation row.identified_by = self.helper.gpi_number_id(rec_num, vocab.StarNumber) catalog._validate_profile = False catalog.features_are_also_found_on = row cdata = {'uri': catalog.id} puid = data.get('persistent_puid') if puid: puid_id = self.helper.gpi_number_id(puid) catalog.identified_by = puid_id cdata['identifiers'] = [puid_id] data['_catalog'] = add_crom_data(data=cdata, what=catalog) yield data
def _populate_object_visual_item(self, data: dict, subject_genre): hmo = get_crom_object(data) title = data.get('title') title = truncate_with_ellipsis(title, 100) or title vi_id = hmo.id + '-VisItem' vi = model.VisualItem(ident=vi_id) vidata = {'uri': vi_id} if title: vidata['label'] = f'Visual work of “{title}”' sales_record = get_crom_object(data['_record']) vidata['names'] = [(title, {'referred_to_by': [sales_record]})] for key in ('genre', 'subject'): if key in data: values = [v.strip() for v in data[key].split(';')] for value in values: for prop, mapping in subject_genre.items(): if value in mapping: aat_url = mapping[value] type = model.Type(ident=aat_url, label=value) setattr(vi, prop, type) data['_visual_item'] = add_crom_data(data=vidata, what=vi) hmo.shows = vi
def _populate_object_catalog_record(self, data: dict, parent, lot, cno, rec_num): hmo = get_crom_object(data) catalog_uri = self.helper.make_proj_uri('CATALOG', cno) catalog = vocab.AuctionCatalogText(ident=catalog_uri, label=f'Sale Catalog {cno}') record_uri = self.helper.make_proj_uri('CATALOG', cno, 'RECORD', rec_num) lot_object_id = parent['lot_object_id'] puid = parent.get('persistent_puid') puid_id = self.helper.gri_number_id(puid) record = vocab.ParagraphText( ident=record_uri, label= f'Sale recorded in catalog: {lot_object_id} (record number {rec_num})' ) record_data = {'uri': record_uri} record_data['identifiers'] = [ model.Name(ident='', content=f'Record of sale {lot_object_id}'), puid_id ] record.part_of = catalog if parent.get('transaction'): record.referred_to_by = vocab.PropertyStatusStatement( ident='', label='Transaction type for sales record', content=parent['transaction']) record.about = hmo data['_record'] = add_crom_data(data=record_data, what=record) return record
def __call__(self, data: dict, *, attribution_modifiers, attribution_group_types): '''Add modeling for artists as people involved in the production of an object''' hmo = get_crom_object(data) data['_organizations'] = [] data['_original_objects'] = [] try: hmo_label = f'{hmo._label}' except AttributeError: hmo_label = 'object' event_id = hmo.id + '-Prod' event = model.Production(ident=event_id, label=f'Production event for {hmo_label}') hmo.produced_by = event artists = data.get('_artists', []) sales_record = get_crom_object(data['_record']) pi = self.helper.person_identity for a in artists: a.setdefault('referred_to_by', []) a.update({ 'pi_record_no': data['pi_record_no'], 'ulan': a['artist_ulan'], 'auth_name': a['art_authority'], 'name': a['artist_name'] }) if a.get('biography'): bio = a['biography'] del a['biography'] cite = vocab.BiographyStatement(ident='', content=bio) a['referred_to_by'].append(cite) def is_or_anon(data: dict): if pi.is_anonymous(data): mods = { m.lower().strip() for m in data.get('attrib_mod_auth', '').split(';') } return 'or' in mods return False or_anon_records = [is_or_anon(a) for a in artists] uncertain_attribution = any(or_anon_records) for seq_no, a in enumerate(artists): attribute_assignment_id = event.id + f'-artist-assignment-{seq_no}' if is_or_anon(a): # do not model the "or anonymous" records; they turn into uncertainty on the other records continue person = self.helper.add_person(a, record=sales_record, relative_id=f'artist-{seq_no+1}', role='artist') artist_label = a.get('role_label') mod = a.get('attrib_mod_auth', '') mods = CaseFoldingSet({m.strip() for m in mod.split(';')} - {''}) attrib_assignment_classes = [model.AttributeAssignment] if uncertain_attribution or 'or' in mods: attrib_assignment_classes.append(vocab.PossibleAssignment) if mods: # TODO: this should probably be in its own JSON service file: STYLE_OF = attribution_modifiers['style of'] FORMERLY_ATTRIBUTED_TO = attribution_modifiers[ 'formerly attributed to'] ATTRIBUTED_TO = attribution_modifiers['attributed to'] COPY_AFTER = attribution_modifiers['copy after'] PROBABLY = attribution_modifiers['probably by'] POSSIBLY = attribution_modifiers['possibly by'] UNCERTAIN = attribution_modifiers['uncertain'] GROUP_TYPES = set(attribution_group_types.values()) GROUP_MODS = { k for k, v in attribution_group_types.items() if v in GROUP_TYPES } if 'copy by' in mods: # equivalent to no modifier pass elif ATTRIBUTED_TO.intersects(mods): # equivalent to no modifier pass elif STYLE_OF.intersects(mods): assignment = vocab.make_multitype_obj( *attrib_assignment_classes, ident=attribute_assignment_id, label=f'In the style of {artist_label}') event.attributed_by = assignment assignment.assigned_property = 'influenced_by' assignment.property_classified_as = vocab.instances[ 'style of'] assignment.assigned = person continue elif mods.intersects(GROUP_MODS): mod_name = list(GROUP_MODS & mods)[0] # TODO: use all matching types? clsname = attribution_group_types[mod_name] cls = getattr(vocab, clsname) group_label = f'{clsname} of {artist_label}' group_id = a['uri'] + f'-{clsname}' group = cls(ident=group_id, label=group_label) formation = model.Formation( ident='', label=f'Formation of {group_label}') formation.influenced_by = person group.formed_by = formation group_data = add_crom_data({'uri': group_id}, group) data['_organizations'].append(group_data) subevent_id = event_id + f'-{seq_no}' # TODO: fix for the case of post-sales merging subevent = model.Production( ident=subevent_id, label=f'Production sub-event for {group_label}') subevent.carried_out_by = group if uncertain_attribution: assignment = vocab.make_multitype_obj( *attrib_assignment_classes, ident=attribute_assignment_id, label=f'Possibly attributed to {group_label}') event.attributed_by = assignment assignment.assigned_property = 'part' assignment.assigned = subevent else: event.part = subevent continue elif FORMERLY_ATTRIBUTED_TO.intersects(mods): # the {uncertain_attribution} flag does not apply to this branch, because this branch is not making a statement # about a previous attribution. the uncertainty applies only to the current attribution. assignment = vocab.ObsoleteAssignment( ident=attribute_assignment_id, label=f'Formerly attributed to {artist_label}') event.attributed_by = assignment assignment.assigned_property = 'carried_out_by' assignment.assigned = person continue elif UNCERTAIN.intersects(mods): if POSSIBLY.intersects(mods): attrib_assignment_classes.append( vocab.PossibleAssignment) assignment = vocab.make_multitype_obj( *attrib_assignment_classes, ident=attribute_assignment_id, label=f'Possibly attributed to {artist_label}') assignment._label = f'Possibly by {artist_label}' else: attrib_assignment_classes.append( vocab.ProbableAssignment) assignment = vocab.make_multitype_obj( *attrib_assignment_classes, ident=attribute_assignment_id, label=f'Probably attributed to {artist_label}') assignment._label = f'Probably by {artist_label}' event.attributed_by = assignment assignment.assigned_property = 'carried_out_by' assignment.assigned = person continue elif COPY_AFTER.intersects(mods): # the {uncertain_attribution} flag does not apply to this branch, because this branch is not making a statement # about the artist of the work, but about the artist of the original work that this work is a copy of. cls = type(hmo) original_id = hmo.id + '-Orig' original_label = f'Original of {hmo_label}' original_hmo = cls(ident=original_id, label=original_label) original_event_id = original_hmo.id + '-Prod' original_event = model.Production( ident=original_event_id, label=f'Production event for {original_label}') original_hmo.produced_by = original_event original_subevent_id = original_event_id + f'-{seq_no}' # TODO: fix for the case of post-sales merging original_subevent = model.Production( ident=original_subevent_id, label=f'Production sub-event for {artist_label}') original_event.part = original_subevent original_subevent.carried_out_by = person event.influenced_by = original_hmo data['_original_objects'].append( add_crom_data(data={}, what=original_hmo)) continue elif mods & {'or', 'and'}: pass else: print(f'UNHANDLED attrib_mod_auth VALUE: {mods}') pprint.pprint(a) continue subprod_path = self.helper.make_uri_path(*a["uri_keys"]) subevent_id = event_id + f'-{subprod_path}' subevent = model.Production( ident=subevent_id, label=f'Production sub-event for {artist_label}') subevent.carried_out_by = person if uncertain_attribution or 'or' in mods: assignment = vocab.make_multitype_obj( *attrib_assignment_classes, ident=attribute_assignment_id, label=f'Possibly attributed to {artist_label}') event.attributed_by = assignment assignment.assigned_property = 'part' assignment.assigned = subevent else: event.part = subevent data['_artists'] = [a for a in artists if not is_or_anon(a)] return data
def __call__(self, data:dict, event_properties, date_modifiers): '''Add modeling data for an auction event''' cno = data['catalog_number'] auction_locations = event_properties['auction_locations'] event_experts = event_properties['experts'] event_commissaires = event_properties['commissaire'] auction = get_crom_object(data) catalog = data['_catalog']['_LOD_OBJECT'] location_data = data['location'] current = self.auction_event_location(location_data) if not current: print(f'*** Empty location data: {pprint.pformat(location_data)}') pprint.pprint(data) # helper.make_place is called here instead of using make_la_place as a separate graph node because the Place object # gets stored in the `auction_locations` object to be used in the second graph component # which uses the data to associate the place with auction lots. base_uri = self.helper.make_proj_uri('AUCTION-EVENT', cno, 'PLACE', '') record = get_crom_object(data.get('_record')) current_p = current locs = [] while current_p: l = current_p.get('name') if l: locs.append(l) current_p = current_p.get('part_of') loc = ', '.join(locs) if len(locs) else None canonical_place = self.helper.get_canonical_place(loc) if canonical_place: place = canonical_place place_data = add_crom_data(data={'uri': place.id}, what=place) else: place_data = self.helper.make_place(current, base_uri=base_uri, record=record) place = get_crom_object(place_data) if place: data['_locations'] = [place_data] auction.took_place_at = place auction_locations[cno] = place.clone(minimal=True) ts, begin, end = timespan_from_bound_components( data, date_modifiers, 'sale_begin_', 'begin', 'sale_end_', 'eoe' ) event_record = get_crom_object(data['_record']) for seq_no, expert in enumerate(data.get('expert', [])): self.helper.copy_source_information(expert, data), person = self.helper.add_person( expert, record=event_record, relative_id=f'expert-{seq_no+1}', role='expert' ) event_experts[cno].append(person.clone(minimal=True)) data['_organizers'].append(add_crom_data(data={}, what=person)) role_id = '' # self.helper.make_proj_uri('AUCTION-EVENT', cno, 'Expert', seq_no) role = vocab.Expert(ident=role_id, label=f'Role of Expert in the event {cno}') role.carried_out_by = person auction.part = role for seq_no, commissaire in enumerate(data.get('commissaire', [])): self.helper.copy_source_information(commissaire, data), person = self.helper.add_person( commissaire, record=event_record, relative_id=f'commissaire-{seq_no+1}', role='commissaire' ) event_commissaires[cno].append(person.clone(minimal=True)) data['_organizers'].append(add_crom_data(data={}, what=person)) role_id = '' # self.helper.make_proj_uri('AUCTION-EVENT', cno, 'Commissaire', seq_no) role = vocab.CommissairePriseur(ident=role_id, label=f'Role of Commissaire-priseur in the event {cno}') role.carried_out_by = person auction.part = role notes = data.get('notes') if notes: auction.referred_to_by = vocab.Note(ident='', content=notes) if 'links' in data: event_record = get_crom_object(data['_record']) links = data['links'] link_keys = set(links.keys()) - {'portal'} for p in links.get('portal', []): url = p['portal_url'] if url.startswith('http'): event_record.referred_to_by = vocab.WebPage(ident=url, label=url) else: warnings.warn(f'*** Portal URL value does not appear to be a valid URL: {url}') for k in link_keys: url = links[k] if isinstance(url, str): event_record.referred_to_by = vocab.WebPage(ident=url, label=url) else: print(f'*** not a URL string: {k}: {url}') if ts: auction.timespan = ts auction.referred_to_by = catalog return data
def make_place(self, data: dict, base_uri=None): ''' Given a dictionary representing data about a place, construct a model.Place object, assign it as the crom data in the dictionary, and return the dictionary. The dictionary keys used to construct the place object are: - name - type (one of: 'City', 'State', 'Province', 'Country', or 'Sovereign') - part_of (a recursive place dictionary) If the name matches a known unique location (derived from the unique_locations service data), the normal recursive handling of part_of data is bypassed, using the ''' # unique_locations = self.unique_locations canonical_location_names = self.canonical_location_names TYPES = { 'city': vocab.instances['city'], 'county': vocab.instances['county'], 'province': vocab.instances['province'], 'state': vocab.instances['province'], 'country': vocab.instances['nation'], 'sovereign': vocab.instances['sovereign'], } if data is None: return None type_name = data.get('type', 'place').lower() name = data.get('name') si = self.static_instances names = data.get('names', []) label = name parent_data = data.get('part_of') place_type = TYPES.get(type_name) parent = None if name.casefold() in canonical_location_names: name = canonical_location_names.get(name.casefold(), name) label = name elif parent_data: parent_data = self.make_place(parent_data, base_uri=base_uri) parent = get_crom_object(parent_data) if label: label = f'{label}, {parent._label}' placeargs = {} p = None if si: p = si.get_instance('Place', name) if not p: p = si.get_instance('Place', label) if p: # this is a static instance. we need to re-thread the part_of relationship # in the data dictionary, because the serialization depends on the dictionary # data, not the properties of the modeled object # from cromulent.model import factory # print(f'PLACE: {name} => {factory.toString(p, False)}') add_crom_data(data=data, what=p) queue = [data] while queue: place_data = queue.pop(0) place = get_crom_object(place_data) parents = getattr(place, 'part_of', []) or [] if parents: for parent in parents: if parent: if 'part_of' not in place_data: parent_data = add_crom_data(data={}, what=parent) place_data['part_of'] = parent_data else: parent_data = add_crom_data( data=place_data['part_of'], what=parent) queue.append(parent_data) elif 'part_of' in place_data: parent_data = self.make_place(place_data['part_of'], base_uri=base_uri) queue.append(parent_data) if p: return data if label: placeargs['label'] = label if data.get('uri'): placeargs['ident'] = data['uri'] # elif label.casefold() in canonical_location_names: # label = canonical_location_names[label.casefold()] # data['uri'] = self.make_shared_uri('PLACE', label) # placeargs['ident'] = data['uri'] elif base_uri: data['uri'] = base_uri + urllib.parse.quote(label) placeargs['ident'] = data['uri'] if not p: p = model.Place(**placeargs) if place_type: p.classified_as = place_type if name: p.identified_by = vocab.PrimaryName(ident='', content=name) else: warnings.warn(f'Place with missing name on {p.id}') for name in names: if name: p.identified_by = model.Name(ident='', content=name) if parent: p.part_of = parent data['part_of'] = parent_data return add_crom_data(data=data, what=p)
def handle_prev_post_owner(self, data, hmo, tx_data, sale_type, lot_object_key, owner_record, record_id, rev, ts=None, make_label=None): current_tx = get_crom_object(tx_data) sales_record = get_crom_object(data['_record']) if rev: rel = f'leading to the previous ownership of' source_label = 'Source of information on history of the object prior to the current sale.' else: rel = f'leading to the subsequent ownership of' source_label = 'Source of information on history of the object after the current sale.' owner_record.update({ 'pi_record_no': data['pi_record_no'], 'ulan': owner_record.get('ulan', owner_record.get('own_ulan')), }) self.add_person(owner_record, record=sales_record, relative_id=record_id, role='artist') owner = get_crom_object(owner_record) # TODO: handle other fields of owner_record: own_auth_d, own_auth_q, own_ques, own_so if owner_record.get('own_auth_l'): loc = owner_record['own_auth_l'] current = parse_location_name(loc, uri_base=self.helper.uid_tag_prefix) place_data = self.helper.make_place(current) place = get_crom_object(place_data) owner.residence = place data['_owner_locations'].append(place_data) if owner_record.get('own_auth_p'): content = owner_record['own_auth_p'] owner.referred_to_by = vocab.Note(ident='', content=content) data.setdefault('_other_owners', []) data['_other_owners'].append(owner_record) # The Provenance Entry URI must not share a prefix with the object URI, otherwise # we run the rist of provenance entries being accidentally merged during URI # reconciliation as part of the prev/post sale rewriting. tx_uri = self.helper.prepend_uri_key(hmo.id, f'PROV-{record_id}') tx_label_args = tuple([self.helper, sale_type, 'Sold', rel] + list(lot_object_key)) tx, _ = self.related_procurement(hmo, tx_label_args, current_tx, ts, buyer=owner, previous=rev, ident=tx_uri, make_label=make_label) if owner_record.get('own_auth_e'): content = owner_record['own_auth_e'] tx.referred_to_by = vocab.Note(ident='', content=content) own_info_source = owner_record.get('own_so') if own_info_source: note = vocab.SourceStatement(ident='', content=own_info_source, label=source_label) tx.referred_to_by = note ptx_data = tx_data.copy() data['_prov_entries'].append(add_crom_data(data=ptx_data, what=tx))