Пример #1
0
    def make_place(self, data: dict, base_uri=None):
        '''
		Given a dictionary representing data about a place, construct a model.Place object,
		assign it as the crom data in the dictionary, and return the dictionary.

		The dictionary keys used to construct the place object are:

		- name
		- type (one of: 'City', 'State', 'Province', or 'Country')
		- part_of (a recursive place dictionary)
		'''
        unique_locations = self.unique_locations
        TYPES = {
            'city': vocab.instances['city'],
            'province': vocab.instances['province'],
            'state': vocab.instances['province'],
            'country': vocab.instances['nation'],
        }

        if data is None:
            return None
        type_name = data.get('type', 'place').lower()

        name = data.get('name')
        label = name
        parent_data = data.get('part_of')

        place_type = TYPES.get(type_name)
        parent = None
        if parent_data:
            parent_data = self.make_place(parent_data, base_uri=base_uri)
            parent = get_crom_object(parent_data)
            if label:
                label = f'{label}, {parent._label}'

        placeargs = {}
        if label:
            placeargs['label'] = label
        if data.get('uri'):
            placeargs['ident'] = data['uri']
        elif label in unique_locations:
            data['uri'] = self.make_proj_uri('PLACE', label)
            placeargs['ident'] = data['uri']
        elif base_uri:
            data['uri'] = base_uri + urllib.parse.quote(label)
            placeargs['ident'] = data['uri']

        p = model.Place(**placeargs)
        if place_type:
            p.classified_as = place_type
        if name:
            p.identified_by = model.Name(ident='', content=name)
        else:
            warnings.warn(f'Place with missing name on {p.id}')
        if parent:
            p.part_of = parent
            data['part_of'] = parent_data
        return add_crom_data(data=data, what=p)
Пример #2
0
 def nation(self, name, ident):
     place = vocab.Place(
         ident=
         'tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-'
         + name,
         label=name)
     nation = model.Place(ident=ident)
     place.classified_as = vocab.instances['nation']
     place.identified_by = model.Name(ident='', content=name)
     return place
Пример #3
0
    def set_properties(self, data, thing):
        super().set_properties(data, thing)
        with suppress(ValueError, TypeError):
            ulan = int(data.get('ulan'))
            if ulan:
                thing.exact_match = model.BaseResource(
                    ident=f'http://vocab.getty.edu/ulan/{ulan}')

        if 'name' in data:
            title_type = model.Type(
                ident='http://vocab.getty.edu/aat/300417193', label='Title')
            name = data['name']
            if name:
                if isinstance(name, str):
                    set_la_name(thing, name, title_type, set_label=True)
                elif isinstance(name, (list, tuple)):
                    value, *properties = name
                    n = model.Name(ident='', content=value)
                    n.classified_as = title_type
                    self.set_lo_properties(n, *properties)
                    thing.identified_by = n

        for uri in data.get('exact_match', []):
            thing.exact_match = uri

        for sdata in data.get('sojourns', []):
            label = sdata.get('label', 'Sojourn activity')
            stype = sdata.get('type', model.Activity)
            act = stype(ident='', label=label)
            ts = get_crom_object(sdata.get('timespan'))
            place = get_crom_object(sdata.get('place'))
            act.timespan = ts
            act.took_place_at = place
            thing.carried_out = act
            self.set_referred_to_by(sdata, act)

        # Locations are names of residence places (P74 -> E53)
        # XXX FIXME: Places are their own model
        if 'places' in data:
            for p in data['places']:
                if isinstance(p, model.Place):
                    pl = p
                elif isinstance(p, dict):
                    pl = get_crom_object(p)
                else:
                    pl = model.Place(ident='', label=p)
                #pl._label = p['label']
                #nm = model.Name()
                #nm.content = p['label']
                #pl.identified_by = nm
                #for s in p['sources']:
                #		l = model.LinguisticObject(ident="urn:uuid:%s" % s[1])
                # l._label = _row_label(s[2], s[3], s[4])
                #	pl.referred_to_by = l
                thing.residence = pl
Пример #4
0
    def model_publisher_group(self, record, data, seq):
        record.setdefault('_publishings', [])

        series_label = record['label']
        corp_id = data.get('gaia_corp_id')
        geog_id = data.get('gaia_geog_id')

        a_uri = record['uri'] + f'-pub-{seq}'
        cb_label = f' by CB{corp_id}' if corp_id else f' by publisher #{seq}'
        a = vocab.Publishing(ident=a_uri,
                             label=f'Publishing of {series_label}' + cb_label)
        if corp_id:
            uri = self.helper.corporate_body_uri(corp_id)
            a.carried_out_by = model.Group(ident=uri)
        if geog_id:
            uri = self.helper.place_uri(geog_id)
            a.took_place_at = model.Place(ident=uri)
        record['_publishings'].append(a)
Пример #5
0
    def make_place(self, data: dict, base_uri=None):
        '''
		Given a dictionary representing data about a place, construct a model.Place object,
		assign it as the crom data in the dictionary, and return the dictionary.

		The dictionary keys used to construct the place object are:

		- name
		- type (one of: 'City', 'State', 'Province', 'Country', or 'Sovereign')
		- part_of (a recursive place dictionary)
		
		If the name matches a known unique location (derived from the unique_locations
		service data), the normal recursive handling of part_of data is bypassed, using
		the 
		'''
        # 		unique_locations = self.unique_locations
        canonical_location_names = self.canonical_location_names
        TYPES = {
            'city': vocab.instances['city'],
            'county': vocab.instances['county'],
            'province': vocab.instances['province'],
            'state': vocab.instances['province'],
            'country': vocab.instances['nation'],
            'sovereign': vocab.instances['sovereign'],
        }

        if data is None:
            return None
        type_name = data.get('type', 'place').lower()

        name = data.get('name')
        si = self.static_instances

        names = data.get('names', [])
        label = name
        parent_data = data.get('part_of')

        place_type = TYPES.get(type_name)

        parent = None

        if name.casefold() in canonical_location_names:
            name = canonical_location_names.get(name.casefold(), name)
            label = name
        elif parent_data:
            parent_data = self.make_place(parent_data, base_uri=base_uri)
            parent = get_crom_object(parent_data)
            if label:
                label = f'{label}, {parent._label}'

        placeargs = {}
        p = None
        if si:
            p = si.get_instance('Place', name)
            if not p:
                p = si.get_instance('Place', label)

            if p:
                # this is a static instance. we need to re-thread the part_of relationship
                # in the data dictionary, because the serialization depends on the dictionary
                # data, not the properties of the modeled object
                # 				from cromulent.model import factory
                # 				print(f'PLACE: {name} => {factory.toString(p, False)}')
                add_crom_data(data=data, what=p)
                queue = [data]
                while queue:
                    place_data = queue.pop(0)
                    place = get_crom_object(place_data)
                    parents = getattr(place, 'part_of', []) or []
                    if parents:
                        for parent in parents:
                            if parent:
                                if 'part_of' not in place_data:
                                    parent_data = add_crom_data(data={},
                                                                what=parent)
                                    place_data['part_of'] = parent_data
                                else:
                                    parent_data = add_crom_data(
                                        data=place_data['part_of'],
                                        what=parent)
                                queue.append(parent_data)
                    elif 'part_of' in place_data:
                        parent_data = self.make_place(place_data['part_of'],
                                                      base_uri=base_uri)
                        queue.append(parent_data)
        if p:
            return data

        if label:
            placeargs['label'] = label

        if data.get('uri'):
            placeargs['ident'] = data['uri']
# 		elif label.casefold() in canonical_location_names:
# 			label = canonical_location_names[label.casefold()]
# 			data['uri'] = self.make_shared_uri('PLACE', label)
# 			placeargs['ident'] = data['uri']
        elif base_uri:
            data['uri'] = base_uri + urllib.parse.quote(label)
            placeargs['ident'] = data['uri']

        if not p:
            p = model.Place(**placeargs)
            if place_type:
                p.classified_as = place_type
            if name:
                p.identified_by = vocab.PrimaryName(ident='', content=name)
            else:
                warnings.warn(f'Place with missing name on {p.id}')
            for name in names:
                if name:
                    p.identified_by = model.Name(ident='', content=name)
            if parent:
                p.part_of = parent
                data['part_of'] = parent_data
        return add_crom_data(data=data, what=p)
Пример #6
0
    def model_imprint_group(self, record, data):
        if not data:
            return
        record.setdefault('referred_to_by', [])
        record.setdefault('used_for', [])
        record.setdefault('part_of', [])
        record.setdefault('_activities', [])
        record.setdefault('_groups', [])
        record.setdefault('_places', [])
        record.setdefault('identifiers', [])

        edition = data.get('edition')
        series_number = data.get('series_number')
        doi = data.get('doi')
        coden = data.get('coden')
        website = data.get('website_address')
        publishers = _as_list(data.get('publisher'))
        distributors = _as_list(data.get('distributor'))
        journal = data.get('journal_info')
        # imprint_group/journal_info/aata_journal_id
        # imprint_group/journal_info/aata_issue_id
        degree = data.get('thesis_degree')
        tr = data.get('technical_report_number')

        if edition:
            record['referred_to_by'].append(
                vocab.EditionStatement(ident='', content=edition))

        if series_number:
            record['referred_to_by'].append(
                vocab.Note(ident='',
                           content=series_number))  # TODO: classify this Note

        if doi:
            record['identifiers'].append(
                vocab.DoiIdentifier(ident='', content=doi))

        if coden:
            record['identifiers'].append(
                vocab.CodenIdentifier(ident='', content=coden))

        if website:
            record['referred_to_by'].append(
                vocab.Note(ident='', content=website))

        article_label = record['label']
        for i, publisher in enumerate(publishers):
            corp_id = publisher.get('gaia_corp_id')
            geog_id = publisher.get('publisher_location',
                                    {}).get('gaia_geog_id')
            a_uri = record['uri'] + f'-pub-{i}'
            a = vocab.Publishing(ident=a_uri,
                                 label=f'Publishing of {article_label}')
            if corp_id:
                uri = self.helper.corporate_body_uri(corp_id)
                g = model.Group(ident=uri)
                a.carried_out_by = g
                record['_groups'].append(add_crom_data({}, g))
            if geog_id:
                uri = self.helper.place_uri(geog_id)
                p = model.Place(ident=uri)
                a.took_place_at = p
                record['_places'].append(add_crom_data({}, p))
            record['used_for'].append(a)
# 			record['_activities'].append(add_crom_data({}, a))

        for i, distributor in enumerate(distributors):
            corp_id = distributor.get('gaia_corp_id')
            geog_id = distributor.get('distributor_location',
                                      {}).get('gaia_geog_id')
            a_uri = record['uri'] + f'-dist-{i}'
            a = vocab.Distributing(ident=a_uri,
                                   label=f'Distribution of {article_label}')
            if corp_id:
                uri = self.helper.corporate_body_uri(corp_id)
                g = model.Group(ident=uri)
                a.carried_out_by = g
                record['_groups'].append(add_crom_data({}, g))
            if geog_id:
                uri = self.helper.place_uri(geog_id)
                p = model.Place(ident=uri)
                a.took_place_at = p
                record['_places'].append(add_crom_data({}, p))
            record['used_for'].append(a)
# 			record['_activities'].append(add_crom_data({}, a))

        if journal:
            journal_id = journal.get('aata_journal_id')
            issue_id = journal.get('aata_issue_id')
            issue_uri = self.helper.issue_uri(journal_id, issue_id)
            issue = vocab.IssueText(ident=issue_uri)
            record['part_of'].append(add_crom_data({'uri': issue_uri}, issue))

        if degree:
            record['referred_to_by'].append(
                vocab.Note(ident='', content=degree))

        if tr:
            record['identifiers'].append(model.Identifier(
                ident='', content=tr))  # TODO: classify this Identifier
Пример #7
0
    def setup_static_instances(self):
        '''
		These are instances that are used statically in the code. For example, when we
		provide attribution of an identifier to Getty, or use a Lugt number, we need to
		serialize the related Group or Person record for that attribution, even if it does
		not appear in the source data.
		'''
        lugt_ulan = 500321736
        gri_ulan = 500115990
        gci_ulan = 500115991
        knoedler_ulan = 500304270
        GETTY_PSCP_URI = self.helper.make_shared_uri(
            'STATIC', 'ORGANIZATION',
            'Project for the Study of Collecting and Provenance')
        GETTY_GPI_URI = self.helper.make_shared_uri('STATIC', 'ORGANIZATION',
                                                    'Getty Provenance Index')
        GETTY_GRI_URI = self.helper.make_proj_uri('ORGANIZATION',
                                                  'LOCATION-CODE', 'JPGM')
        GETTY_GCI_URI = self.helper.make_shared_uri(
            'STATIC', 'ORGANIZATION', 'Getty Conservation Institute')
        LUGT_URI = self.helper.make_proj_uri('PERSON', 'ULAN', lugt_ulan)
        KNOEDLER_URI = self.helper.make_shared_uri('ORGANIZATION', 'ULAN',
                                                   str(knoedler_ulan))
        NEWYORK_URI = self.helper.make_shared_uri('PLACE', 'USA', 'NY',
                                                  'New York')

        gci = model.Group(ident=GETTY_GCI_URI,
                          label='Getty Conservation Institute')
        gci.identified_by = vocab.PrimaryName(
            ident='', content='Getty Conservation Institute')
        gci.exact_match = model.BaseResource(
            ident=f'http://vocab.getty.edu/ulan/{gci_ulan}')

        gri = model.Group(ident=GETTY_GRI_URI,
                          label='Getty Research Institute')
        gri.identified_by = vocab.PrimaryName(
            ident='', content='Getty Research Institute')
        gri.exact_match = model.BaseResource(
            ident=f'http://vocab.getty.edu/ulan/{gri_ulan}')

        gpi = model.Group(ident=GETTY_GPI_URI, label='Getty Provenance Index')
        gpi.identified_by = vocab.PrimaryName(ident='',
                                              content='Getty Provenance Index')

        pscp = model.Group(
            ident=GETTY_PSCP_URI,
            label='Project for the Study of Collecting and Provenance')
        pscp.identified_by = vocab.PrimaryName(
            ident='',
            content='Project for the Study of Collecting and Provenance')

        lugt = model.Person(ident=LUGT_URI, label='Frits Lugt')
        lugt.identified_by = vocab.PrimaryName(ident='', content='Frits Lugt')
        lugt.exact_match = model.BaseResource(
            ident=f'http://vocab.getty.edu/ulan/{lugt_ulan}')

        knoedler_name = 'M. Knoedler & Co.'
        knoedler = model.Group(ident=KNOEDLER_URI, label=knoedler_name)
        knoedler.identified_by = vocab.PrimaryName(ident='',
                                                   content=knoedler_name)
        knoedler.exact_match = model.BaseResource(
            ident=f'http://vocab.getty.edu/ulan/{knoedler_ulan}')

        newyork_name = 'New York, NY'
        newyork = model.Place(ident=NEWYORK_URI, label=newyork_name)
        newyork.identified_by = vocab.PrimaryName(ident='',
                                                  content=newyork_name)

        materials = {}
        if 'materials' in self.services:
            materials.update({
                aat: model.Material(ident=f'http://vocab.getty.edu/aat/{aat}',
                                    label=label)
                for aat, label in self.services['materials'].items()
            })

        places = self._static_place_instances()
        places.update({'newyork': newyork})

        db_people = self.static_db_instance(
            'PEOPLE', name='STAR Person Authority Database', creator=gpi)
        db_knoedler = self.static_db_instance('Knoedler',
                                              name='STAR Knoedler Database',
                                              creator=gpi)
        db_sales_events = self.static_db_instance(
            'Sales',
            'Descriptions',
            name='STAR Sales Catalogue Database',
            creator=gpi)
        db_sales_catalogs = self.static_db_instance(
            'Sales',
            'Catalogue',
            name='STAR Physical Sales Catalogue Database',
            creator=gpi)
        db_sales_contents = self.static_db_instance(
            'Sales',
            'Contents',
            name='STAR Sales Contents Database',
            creator=gpi)

        instances = defaultdict(dict)
        instances.update({
            'LinguisticObject': {
                'db-people': db_people,
                'db-knoedler': db_knoedler,
                'db-sales_events': db_sales_events,
                'db-sales_catalogs': db_sales_catalogs,
                'db-sales_contents': db_sales_contents,
            },
            'Group': {
                'gci': gci,
                'pscp': pscp,
                'gri': gri,
                'gpi': gpi,
                'knoedler': knoedler
            },
            'Person': {
                'lugt': lugt
            },
            'Material': materials,
            'Place': places
        })

        return instances