示例#1
0
 def test_init_params(self):
     p1 = model.Person(ident="urn:uuid:1234")
     self.assertEqual(p1.id, "urn:uuid:1234")
     p2 = model.Person(ident="http://schema.org/Foo")
     self.assertEqual(p2.id, "schema:Foo")
     p3 = model.Name(content="Test")
     self.assertEqual(p3.content, "Test")
     c = model.MonetaryAmount(value=10)
     self.assertEqual(c.value, 10)
     n = model.Name(value="Rob")
     self.assertEqual(n.content, "Rob")
     i = model.Identifier(content="xyz123")
     self.assertEqual(i.content, "xyz123")
     i2 = model.Identifier(value="abc")
     self.assertEqual(i2.content, "abc")
示例#2
0
	def __call__(self, data:dict, event_properties, date_modifiers):
		'''Add modeling for an auction event based on properties of the supplied `data` dict.'''
		record = get_crom_object(data['_catalog'])
		cno = data['catalog_number']
		sale_type = data.get('non_auction_flag', 'Auction')

		ts, begin, end = timespan_from_bound_components(
			data,
			date_modifiers,
			'sale_begin_', 'begin',
			'sale_end_', 'eoe'
		)
		
		event_properties['auction_dates'][cno] = (ts, begin, end)
		event_properties['auction_date_label'][cno] = ts._label
		
		event_date_label = event_properties['auction_date_label'].get(cno)
		auction, uid, uri = self.helper.sale_event_for_catalog_number(cno, sale_type, date_label=event_date_label)
		auction.referred_to_by = record
		auction.identified_by = model.Name(ident='', content=auction._label)
		data['uid'] = uid
		data['uri'] = uri
		add_crom_data(data=data, what=auction)
		
		catalog = get_crom_object(data['_catalog'])
		data['_record'] = data['_catalog']
		return data
示例#3
0
    def model_person_or_group(self,
                              data: dict,
                              a: dict,
                              attribution_group_types,
                              attribution_group_names,
                              role='artist',
                              seq_no=0,
                              sales_record=None):
        if get_crom_object(a):
            return a

        mods = a['modifiers']

        artist = self.helper.add_person(a,
                                        record=sales_record,
                                        relative_id=f'artist-{seq_no+1}',
                                        role=role)
        artist_label = a['label']
        person = get_crom_object(a)

        if mods:
            GROUP_TYPES = set(attribution_group_types.values())
            GROUP_MODS = {
                k
                for k, v in attribution_group_types.items() if v in GROUP_TYPES
            }

            if mods.intersects(GROUP_MODS):
                mod_name = list(GROUP_MODS
                                & mods)[0]  # TODO: use all matching types?
                clsname = attribution_group_types[mod_name]
                cls = getattr(vocab, clsname)
                group_name = attribution_group_names[clsname]
                group_label = f'{group_name} {artist_label}'
                a['label'] = group_label
                # The group URI is just the person URI with a suffix. In any case
                # where the person is merged, the group should be merged as well.
                # For example, when if "RUBENS" is merged, "School of RUBENS" should
                # also be merged.
                group_id = a['uri'] + f'-{clsname}'
                group = cls(ident=group_id, label=group_label)
                group.identified_by = model.Name(ident='', content=group_label)
                formation = model.Formation(
                    ident='', label=f'Formation of {group_label}')
                formation.influenced_by = person
                group.formed_by = formation
                pi_record_no = data['pi_record_no']
                group_uri_key = ('GROUP', 'PI', pi_record_no, f'{role}Group')
                group_data = {
                    'uri': group_id,
                    'uri_keys': group_uri_key,
                    'modifiers': mods,
                    'label': group_label
                }
                add_crom_data(group_data, group)
                data['_organizations'].append(group_data)
                return group_data

        add_crom_data(a, artist)
        return a
示例#4
0
 def test_init_params(self):
     p1 = model.Person(ident="urn:uuid:1234")
     self.assertEqual(p1.id, "urn:uuid:1234")
     p2 = model.Person(ident="http://schema.org/Foo")
     self.assertEqual(p2.id, "schema:Foo")
     p3 = model.Name(content="Test")
     self.assertEqual(p3.content, "Test")
示例#5
0
    def professional_activity(self,
                              name: str,
                              century=None,
                              date_range=None,
                              classified_as=None,
                              **kwargs):
        '''
		Return a vocab.Active object representing the professional activities
		of the `name`d person.
		
		If `century` or `date_range` arguments are supplied, they are used to
		associate a timespan with the activity.
		
		If a `classified_as` list is supplied, it is used to further classify
		the `vocab.Active` object.
		'''
        if not classified_as:
            classified_as = [model.Activity]

        args = {'ident': '', 'label': f'Professional activity of {name}'}
        if 'ident' in kwargs:
            args['ident'] = kwargs['ident']
        a = vocab.make_multitype_obj(*classified_as, **args)

        ts = self.active_timespan(century=century,
                                  date_range=date_range,
                                  **kwargs)
        if ts:
            if 'verbatim_active_period' in kwargs:
                ts.identified_by = model.Name(
                    ident='', content=kwargs['verbatim_active_period'])
            a.timespan = ts
        return a
示例#6
0
    def make_place(self, data: dict, base_uri=None):
        '''
		Given a dictionary representing data about a place, construct a model.Place object,
		assign it as the crom data in the dictionary, and return the dictionary.

		The dictionary keys used to construct the place object are:

		- name
		- type (one of: 'City', 'State', 'Province', or 'Country')
		- part_of (a recursive place dictionary)
		'''
        unique_locations = self.unique_locations
        TYPES = {
            'city': vocab.instances['city'],
            'province': vocab.instances['province'],
            'state': vocab.instances['province'],
            'country': vocab.instances['nation'],
        }

        if data is None:
            return None
        type_name = data.get('type', 'place').lower()

        name = data.get('name')
        label = name
        parent_data = data.get('part_of')

        place_type = TYPES.get(type_name)
        parent = None
        if parent_data:
            parent_data = self.make_place(parent_data, base_uri=base_uri)
            parent = get_crom_object(parent_data)
            if label:
                label = f'{label}, {parent._label}'

        placeargs = {}
        if label:
            placeargs['label'] = label
        if data.get('uri'):
            placeargs['ident'] = data['uri']
        elif label in unique_locations:
            data['uri'] = self.make_proj_uri('PLACE', label)
            placeargs['ident'] = data['uri']
        elif base_uri:
            data['uri'] = base_uri + urllib.parse.quote(label)
            placeargs['ident'] = data['uri']

        p = model.Place(**placeargs)
        if place_type:
            p.classified_as = place_type
        if name:
            p.identified_by = model.Name(ident='', content=name)
        else:
            warnings.warn(f'Place with missing name on {p.id}')
        if parent:
            p.part_of = parent
            data['part_of'] = parent_data
        return add_crom_data(data=data, what=p)
示例#7
0
文件: objects.py 项目: kasei/pipeline
    def populate_destruction_events(self,
                                    data: dict,
                                    note,
                                    *,
                                    type_map,
                                    location=None):
        destruction_types_map = type_map
        hmo = get_crom_object(data)
        title = data.get('title')
        short_title = truncate_with_ellipsis(title, 100) or title

        r = re.compile(
            r'[Dd]estroyed(?: (?:by|during) (\w+))?(?: in (\d{4})[.]?)?')
        m = r.search(note)
        if m:
            method = m.group(1)
            year = m.group(2)
            # The destruction URI is just the object URI with a suffix. When URIs are
            # reconciled during prev/post sale rewriting, this will allow us to also reconcile
            # the URIs for the destructions (of which there should only be one per object)
            dest_uri = hmo.id + '-Destruction'

            d = model.Destruction(ident=dest_uri,
                                  label=f'Destruction of “{short_title}”')
            d.referred_to_by = vocab.Note(ident='', content=note)
            if year is not None:
                begin, end = date_cleaner(year)
                ts = timespan_from_outer_bounds(begin, end)
                ts.identified_by = model.Name(ident='', content=year)
                d.timespan = ts

            if method:
                with suppress(KeyError, AttributeError):
                    type_name = destruction_types_map[method.lower()]
                    otype = vocab.instances[type_name]
                    event = model.Event(
                        label=
                        f'{method.capitalize()} event causing the destruction of “{short_title}”'
                    )
                    event.classified_as = otype
                    d.caused_by = event
                    data['_events'].append(add_crom_data(data={}, what=event))

            if location:
                current = parse_location_name(
                    location, uri_base=self.helper.uid_tag_prefix)
                # The place URI used for destruction events is based on the object URI with
                # a suffix. When URIs are reconciled during prev/post sale rewriting, this
                # will allow us to also reconcile the URIs for the places of destruction
                # (of which there should only be one hierarchy per object)
                base_uri = hmo.id + '-Destruction-Place,'
                place_data = self.helper.make_place(current, base_uri=base_uri)
                place = get_crom_object(place_data)
                if place:
                    data['_locations'].append(place_data)
                    d.took_place_at = place

            hmo.destroyed_by = d
示例#8
0
    def set_properties(self, data, thing):
        super().set_properties(data, thing)
        with suppress(KeyError):
            thing._label = str(data['label'])

        for event in data.get('events', []):
            thing.carried_out = event

        for n in data.get('nationality', []):
            thing.classified_as = n

        if data.get('formation'):
            b = model.Formation()
            ts = model.TimeSpan(ident='')
            if 'formation_clean' in data and data['formation_clean']:
                if data['formation_clean'][0]:
                    ts.begin_of_the_begin = data['formation_clean'][
                        0].strftime("%Y-%m-%dT%H:%M:%SZ")
                if data['formation_clean'][1]:
                    ts.end_of_the_end = data['formation_clean'][1].strftime(
                        "%Y-%m-%dT%H:%M:%SZ")
            verbatim = data['formation']
            ts._label = verbatim
            ts.identified_by = model.Name(ident='', content=verbatim)
            b.timespan = ts
            b._label = "Formation of %s" % thing._label
            thing.formed_by = b

        if data.get('dissolution'):
            d = model.Dissolution()
            ts = model.TimeSpan(ident='')
            if 'dissolution_clean' in data and data['dissolution_clean']:
                if data['dissolution_clean'][0]:
                    ts.begin_of_the_begin = data['dissolution_clean'][
                        0].strftime("%Y-%m-%dT%H:%M:%SZ")
                if data['dissolution_clean'][1]:
                    ts.end_of_the_end = data['dissolution_clean'][1].strftime(
                        "%Y-%m-%dT%H:%M:%SZ")
            verbatim = data['dissolution']
            ts._label = verbatim
            ts.identified_by = model.Name(ident='', content=verbatim)
            d.timespan = ts
            d._label = "Dissolution of %s" % thing._label
            thing.dissolved_by = d
示例#9
0
 def nation(self, name, ident):
     place = vocab.Place(
         ident=
         'tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-'
         + name,
         label=name)
     nation = model.Place(ident=ident)
     place.classified_as = vocab.instances['nation']
     place.identified_by = model.Name(ident='', content=name)
     return place
示例#10
0
    def set_properties(self, data, thing):
        super().set_properties(data, thing)
        with suppress(ValueError, TypeError):
            ulan = int(data.get('ulan'))
            if ulan:
                thing.exact_match = model.BaseResource(
                    ident=f'http://vocab.getty.edu/ulan/{ulan}')

        if 'name' in data:
            title_type = model.Type(
                ident='http://vocab.getty.edu/aat/300417193', label='Title')
            name = data['name']
            if name:
                if isinstance(name, str):
                    set_la_name(thing, name, title_type, set_label=True)
                elif isinstance(name, (list, tuple)):
                    value, *properties = name
                    n = model.Name(ident='', content=value)
                    n.classified_as = title_type
                    self.set_lo_properties(n, *properties)
                    thing.identified_by = n

        for uri in data.get('exact_match', []):
            thing.exact_match = uri

        for sdata in data.get('sojourns', []):
            label = sdata.get('label', 'Sojourn activity')
            stype = sdata.get('type', model.Activity)
            act = stype(ident='', label=label)
            ts = get_crom_object(sdata.get('timespan'))
            place = get_crom_object(sdata.get('place'))
            act.timespan = ts
            act.took_place_at = place
            thing.carried_out = act
            self.set_referred_to_by(sdata, act)

        # Locations are names of residence places (P74 -> E53)
        # XXX FIXME: Places are their own model
        if 'places' in data:
            for p in data['places']:
                if isinstance(p, model.Place):
                    pl = p
                elif isinstance(p, dict):
                    pl = get_crom_object(p)
                else:
                    pl = model.Place(ident='', label=p)
                #pl._label = p['label']
                #nm = model.Name()
                #nm.content = p['label']
                #pl.identified_by = nm
                #for s in p['sources']:
                #		l = model.LinguisticObject(ident="urn:uuid:%s" % s[1])
                # l._label = _row_label(s[2], s[3], s[4])
                #	pl.referred_to_by = l
                thing.residence = pl
示例#11
0
def timespan_after(before):
    ts = model.TimeSpan(ident='')
    try:
        ts.begin_of_the_begin = before.end_of_the_end
        with suppress(AttributeError):
            l = f'After {before._label}'
            l.identified_by = model.Name(ident='', content=l)
            ts._label = l
        return ts
    except AttributeError:
        return None
示例#12
0
def timespan_before(after):
    ts = model.TimeSpan(ident='')
    try:
        ts.end_of_the_end = after.begin_of_the_begin
        with suppress(AttributeError):
            l = f'Before {after._label}'
            l.identified_by = model.Name(ident='', content=l)
            ts._label = l
        return ts
    except AttributeError:
        return None
示例#13
0
    def populate_destruction_events(self,
                                    data: dict,
                                    note,
                                    *,
                                    type_map,
                                    location=None):
        destruction_types_map = type_map
        hmo = get_crom_object(data)
        title = data.get('title')
        short_title = truncate_with_ellipsis(title, 100) or title

        r = re.compile(
            r'[Dd]estroyed(?: (?:by|during) (\w+))?(?: in (\d{4})[.]?)?')
        m = r.search(note)
        if m:
            method = m.group(1)
            year = m.group(2)
            dest_id = hmo.id + '-Destr'
            d = model.Destruction(ident=dest_id,
                                  label=f'Destruction of “{short_title}”')
            d.referred_to_by = vocab.Note(ident='', content=note)
            if year is not None:
                begin, end = date_cleaner(year)
                ts = timespan_from_outer_bounds(begin, end)
                ts.identified_by = model.Name(ident='', content=year)
                d.timespan = ts

            if method:
                with suppress(KeyError, AttributeError):
                    type_name = destruction_types_map[method.lower()]
                    otype = vocab.instances[type_name]
                    event = model.Event(
                        label=
                        f'{method.capitalize()} event causing the destruction of “{short_title}”'
                    )
                    event.classified_as = otype
                    d.caused_by = event
                    data['_events'].append(add_crom_data(data={}, what=event))

            if location:
                current = parse_location_name(
                    location, uri_base=self.helper.uid_tag_prefix)
                base_uri = hmo.id + '-Place,'
                place_data = self.helper.make_place(current, base_uri=base_uri)
                place = get_crom_object(place_data)
                if place:
                    data['_locations'].append(place_data)
                    d.took_place_at = place

            hmo.destroyed_by = d
示例#14
0
文件: events.py 项目: kasei/pipeline
    def __call__(self, data: dict):
        '''Add modeling for an auction event based on properties of the supplied `data` dict.'''
        cno = data['catalog_number']
        sale_type = data.get('non_auction_flag', 'Auction')
        auction, uid, uri = self.helper.sale_event_for_catalog_number(
            cno, sale_type)
        auction.identified_by = model.Name(ident='', content=auction._label)
        data['uid'] = uid
        data['uri'] = uri
        add_crom_data(data=data, what=auction)

        catalog = get_crom_object(data['_catalog'])
        data['_record'] = data['_catalog']
        return data
示例#15
0
	def test_boundary_setter(self):
		vocab.add_linked_art_boundary_check()
		p = model.Person()
		p2 = model.Person()
		n = model.Name()
		n.content = "Test"
		p2.identified_by = n
		p.exact_match = p2
		# Now, Test should not appear in the resulting JSON of p
		factory.linked_art_boundaries = True
		js = factory.toJSON(p)
		self.assertTrue(not 'identified_by' in js['exact_match'][0])
		factory.linked_art_boundaries = False
		js = factory.toJSON(p)
		self.assertTrue('identified_by' in js['exact_match'][0])		
示例#16
0
    def test_production_mode(self):

        # model.factory.production_mode()
        # Can't unset the cached hierarchy
        # and it causes the test for the hierarchy to fail
        model.factory.validate_profile = False
        model.factory.validate_properties = False
        model.factory.validate_range = False
        model.factory.validate_multiplicity = False

        p = model.Person()
        p.identified_by = model.Name(value="abc")
        p.part = model.HumanMadeObject()
        js = model.factory.toJSON(p)

        model.factory.production_mode(state=False)
示例#17
0
    def model_sojourn(self, data, loc):
        base_uri = self.helper.make_proj_uri('PLACE', '')
        cb = data.get('corporate_body', False)
        sojourn_type = vocab.Establishment if cb else vocab.Residing
        sdata = {
            'type': sojourn_type,
            'referred_to_by': [],
        }

        verbatim_date = loc.get('address_date')
        if verbatim_date:
            date_range = date_cleaner(verbatim_date)
            if date_range:
                begin, end = date_range
                ts = timespan_from_outer_bounds(*date_range)
                ts.identified_by = model.Name(ident='', content=verbatim_date)
                sdata['timespan'] = add_crom_data(
                    {
                        'address_date': verbatim_date,
                        'begin': begin,
                        'end': end
                    }, ts)

        current = None
        l = loc.get('location')
        if l:
            current = parse_location_name(l, uri_base=self.helper.proj_prefix)
        address = loc.get('address')
        if address:
            current = {
                'name': address,
                'part_of': current,
                'type': 'address',
            }

        for k in ('address_note', 'location_note'):
            note = loc.get(k)
            if note:
                sdata['referred_to_by'].append(
                    vocab.Note(ident='', content=note))

        if current:
            place_data = self.helper.make_place(current, base_uri=base_uri)
            data['_places'].append(place_data)
            sdata['place'] = place_data
        return sdata
示例#18
0
文件: extract.py 项目: kasei/crom
def extract_physical_dimensions(dimstr, **kwargs):
    dimensions = dimensions_cleaner(dimstr, **kwargs)
    if dimensions:
        for orig_d in dimensions:
            dimdata = normalized_dimension_object(orig_d, source=dimstr)
            if dimdata:
                dimension, label = dimdata
                if dimension.which == 'height':
                    dim = vocab.Height(ident='')
                elif dimension.which == 'width':
                    dim = vocab.Width(ident='')
                else:
                    dim = vocab.PhysicalDimension(ident='')
                dim.value = dimension.value
                dim.identified_by = model.Name(ident='', content=label)
                unit = vocab.instances.get(dimension.unit)
                if unit:
                    dim.unit = unit
                yield dim
示例#19
0
    def test_not_multiple_instance(self):
        who = model.Person()
        n = model.Name(content="Test")
        who.identified_by = n

        model.factory.multiple_instances_per_property = "error"
        self.assertRaises(model.DataError, who.__setattr__, 'identified_by', n)
        self.assertEqual(who.identified_by, [n])

        model.factory.multiple_instances_per_property = "drop"
        who.identified_by = n
        self.assertEqual(who.identified_by, [n, n])
        # and check that only serialized once
        js = model.factory.toJSON(who)
        self.assertEqual(len(js['identified_by']), 1)

        model.factory.multiple_instances_per_property = "allow"
        js = model.factory.toJSON(who)
        self.assertEqual(len(js['identified_by']), 2)
示例#20
0
    def __call__(self, data: dict, location_codes, unique_catalogs):
        '''Add information about the ownership of a physical copy of an auction catalog'''
        # Add the URI of this physical catalog to `unique_catalogs`. This data will be used
        # later to figure out which catalogs can be uniquely identified by a catalog number
        # and owner code (e.g. for owners who do not have multiple copies of a catalog).
        cno = data['catalog_number']
        owner_code = data['owner_code']
        copy_number = data.get('copy_number', '')
        owner_name = None
        entry_record = get_crom_object(data.get('_catalog'))
        with suppress(KeyError):
            owner_name = location_codes[owner_code]
            owner_uri = self.helper.make_proj_uri('ORGANIZATION',
                                                  'LOCATION-CODE', owner_code)
            data['_owner'] = {
                'label':
                owner_name,
                'uri':
                owner_uri,
                'referred_to_by': [entry_record],
                'identifiers': [
                    model.Name(ident='', content=owner_name),
                    model.Identifier(ident='', content=str(owner_code))
                ],
            }
            owner = model.Group(ident=owner_uri)
            owner.referred_to_by = entry_record
            add_crom_data(data['_owner'], owner)
            if not owner_code:
                warnings.warn(f'Setting empty identifier on {owner.id}')
            add_crom_data(data=data['_owner'], what=owner)
            catalog = get_crom_object(data)
            catalog.current_owner = owner

        owner_uri = self.helper.physical_catalog_uri(
            cno, owner_code, None
        )  # None here because we want a key that will stand in for all the copies belonging to a single owner
        copy_uri = self.helper.physical_catalog_uri(cno, owner_code,
                                                    copy_number)
        unique_catalogs[owner_uri].add(copy_uri)
        return data
示例#21
0
    def set_lot_objects(self, lot, cno, lno, auction_of_lot_uri, data,
                        sale_type):
        '''Associate the set of objects with the auction lot.'''
        shared_lot_number = self.helper.shared_lot_number_from_lno(lno)
        set_type = vocab.AuctionLotSet if sale_type == 'Auction' else vocab.CollectionSet
        coll_label = f'Object Set for Lot {cno} {shared_lot_number}'
        coll = set_type(ident=f'{auction_of_lot_uri}-Set', label=coll_label)
        coll.identified_by = model.Name(ident='', content=coll_label)
        est_price = data.get('estimated_price')
        if est_price:
            self.set_possible_attribute(coll, 'dimension', est_price)
        start_price = data.get('start_price')
        if start_price:
            self.set_possible_attribute(coll, 'dimension', start_price)

        ask_price = data.get('ask_price')
        if ask_price:
            self.set_possible_attribute(coll, 'dimension', ask_price)

        lot.used_specific_object = coll
        data['_lot_object_set'] = add_crom_data(data={}, what=coll)
示例#22
0
def set_la_name(thing, value, title_type=None, set_label=False):
    if value is None:
        return None
    if isinstance(value, tuple):
        label, language = value
    else:
        label = value
        language = None
    if set_label:
        if not label:
            warnings.warn(f'Setting empty label on {thing.id}')
        thing._label = label
    name = model.Name(ident='', content=label)
    if title_type is not None:
        if isinstance(title_type, model.Type):
            name.classified_as = title_type
        else:
            vocab.add_classification(name, title_type)
    thing.identified_by = name
    if language is not None:
        name.language = language
    return name
示例#23
0
    def set_lot_date(lot, auction_data, event_dates):
        '''Associate a timespan with the auction lot.'''
        date = implode_date(auction_data, 'lot_sale_')
        if date:
            begin = implode_date(auction_data, 'lot_sale_', clamp='begin')
            end = implode_date(auction_data, 'lot_sale_', clamp='eoe')
            bounds = [begin, end]
        else:
            bounds = []

        if bounds:
            if auction_data.get('lot_sale_mod'):
                # if the lot sale date is marked as uncertain:
                #   - use the event end date as the lot sale's end_of_the_end
                #   - if the event doesn't have a known end date, assert no end_of_the_end for the lot sale
                if event_dates and event_dates[1]:
                    bounds[1] = event_dates[1]
                else:
                    bounds[1] = None
            ts = timespan_from_outer_bounds(*bounds)
            ts.identified_by = model.Name(ident='', content=date)
            lot.timespan = ts
示例#24
0
    def _populate_object_catalog_record(self, data: dict, parent, lot, cno,
                                        rec_num):
        hmo = get_crom_object(data)

        catalog_uri = self.helper.make_proj_uri('CATALOG', cno)
        catalog = vocab.AuctionCatalogText(ident=catalog_uri,
                                           label=f'Sale Catalog {cno}')

        record_uri = self.helper.make_proj_uri('CATALOG', cno, 'RECORD',
                                               rec_num)
        lot_object_id = parent['lot_object_id']

        puid = parent.get('persistent_puid')
        puid_id = self.helper.gri_number_id(puid)

        record = vocab.ParagraphText(
            ident=record_uri,
            label=
            f'Sale recorded in catalog: {lot_object_id} (record number {rec_num})'
        )
        record_data = {'uri': record_uri}
        record_data['identifiers'] = [
            model.Name(ident='', content=f'Record of sale {lot_object_id}'),
            puid_id
        ]
        record.part_of = catalog

        if parent.get('transaction'):
            record.referred_to_by = vocab.PropertyStatusStatement(
                ident='',
                label='Transaction type for sales record',
                content=parent['transaction'])
        record.about = hmo

        data['_record'] = add_crom_data(data=record_data, what=record)
        return record
示例#25
0
def make_ymd_timespan(data: dict, start_prefix="", end_prefix="", label=""):
    y = f'{start_prefix}year'
    m = f'{start_prefix}month'
    d = f'{start_prefix}day'
    y2 = f'{end_prefix}year'
    m2 = f'{end_prefix}month'
    d2 = f'{end_prefix}day'

    t = model.TimeSpan(ident='')
    if not label:
        label = ymd_to_label(data[y], data[m], data[d])
        if y != y2:
            lbl2 = ymd_to_label(data[y2], data[m2], data[d2])
            label = f'{label} to {lbl2}'
    t._label = label
    if not label:
        warnings.warn(f'Setting empty name on {t.id}')
    t.identified_by = model.Name(ident='', content=label)
    t.begin_of_the_begin = ymd_to_datetime(data[y], data[m], data[d])
    t.end_of_the_end = ymd_to_datetime(data[y2],
                                       data[m2],
                                       data[d2],
                                       which="end")
    return t
示例#26
0
文件: extract.py 项目: kasei/crom
def extract_monetary_amount(data,
                            add_citations=False,
                            currency_mapping=CURRENCY_MAPPING,
                            source_mapping=None,
                            truncate_label_digits=2):
    '''
	Returns a `MonetaryAmount`, `StartingPrice`, or `EstimatedPrice` object
	based on properties of the supplied `data` dict. If no amount or currency
	data is found in found, returns `None`.

	For `EstimatedPrice`, values will be accessed from these keys:
		- amount: `est_price_amount` or `est_price`
		- currency: `est_price_currency` or `est_price_curr`
		- note: `est_price_note` or `est_price_desc`
		- bibliographic statement: `est_price_citation`

	For `StartingPrice`, values will be accessed from these keys:
		- amount: `start_price_amount` or `start_price`
		- currency: `start_price_currency` or `start_price_curr`
		- note: `start_price_note` or `start_price_desc`
		- bibliographic statement: `start_price_citation`

	For `MonetaryAmount` prices, values will be accessed from these keys:
		- amount: `price_amount` or `price`
		- currency: `price_currency` or `price_curr`
		- note: `price_note` or `price_desc`
		- bibliographic statement: `price_citation`
	'''
    amount_type = 'Price'
    if 'price' in data or 'price_amount' in data or 'amount' in data:
        amnt = model.MonetaryAmount(ident='')
        price_amount = data.get('price_amount',
                                data.get('price', data.get('amount')))
        price_currency = data.get(
            'currency', data.get('price_currency', data.get('price_curr')))
        note = data.get('price_note', data.get('price_desc', data.get('note')))
        cite = data.get('price_citation', data.get('citation'))
        source = data.get('price_source', '')
    elif 'est_price' in data or 'est_price_amount' in data:
        amnt = vocab.EstimatedPrice(ident='')
        price_amount = data.get('est_price_amount', data.get('est_price'))
        price_currency = data.get(
            'est_price_currency',
            data.get('est_price_curr', data.get('currency')))
        amount_type = 'Estimated Price'
        note = data.get('est_price_note',
                        data.get('est_price_desc', data.get('note')))
        cite = data.get('est_price_citation', data.get('citation'))
        source = data.get('est_price_source', data.get('est_price_so', ''))
    elif 'start_price' in data or 'start_price_amount' in data:
        amnt = vocab.StartingPrice(ident='')
        price_amount = data.get('start_price_amount', data.get('start_price'))
        price_currency = data.get(
            'start_price_currency',
            data.get('start_price_curr', data.get('currency')))
        amount_type = 'Starting Price'
        note = data.get('start_price_note',
                        data.get('start_price_desc', data.get('note')))
        cite = data.get('start_price_citation', data.get('citation'))
        source = data.get('start_price_source', data.get('start_price_so', ''))
    elif 'ask_price' in data or 'ask_price_amount' in data:
        amnt = vocab.AskingPrice(ident='')
        price_amount = data.get('ask_price_amount', data.get('ask_price'))
        price_currency = data.get(
            'ask_price_currency',
            data.get('ask_price_curr', data.get('currency')))
        amount_type = 'Asking Price'
        note = data.get('ask_price_note',
                        data.get('ask_price_desc', data.get('note')))
        cite = data.get('ask_price_citation', data.get('citation'))
        source = data.get('ask_price_source', data.get('ask_price_so', ''))
    else:
        return None

    price_amount_label = price_amount
    if price_amount or price_currency:
        if cite and add_citations:
            amnt.referred_to_by = vocab.BibliographyStatement(ident='',
                                                              content=cite)
        if note:
            amnt.referred_to_by = vocab.Note(ident='', content=note)

        if price_amount:
            try:
                value = price_amount
                value = value.replace('[?]', '')
                value = value.replace('?', '')
                value = value.strip()
                if re.search(re.compile(r',\d\d\d'), value):
                    value = value.replace(',', '')
                value = float(value)

                label_fmt = '{:,.%df}' % truncate_label_digits
                price_amount_label = label_fmt.format(value)

                amnt.value = value
            except ValueError:
                amnt._label = price_amount
                amnt.identified_by = model.Name(ident='', content=price_amount)
    # 			warnings.warn(f'*** Not a numeric price amount: {value}')
        if price_currency:
            price_currency_key = price_currency
            try:
                price_currency_key = currency_mapping[
                    price_currency_key.lower()]
            except KeyError:
                pass
            if isinstance(price_currency_key, model.BaseResource):
                amnt.currency = price_currency_key
            elif price_currency_key in vocab.instances:
                amnt.currency = vocab.instances[price_currency_key]
            else:
                warnings.warn('*** No currency instance defined for %s' %
                              (price_currency_key, ))
        if price_amount_label and price_currency:
            amnt._label = '%s %s' % (price_amount_label, price_currency)
        elif price_amount:
            amnt._label = '%s' % (price_amount, )
        return amnt
    return None
示例#27
0
    def _populate_object_present_location(self, data: dict, now_key,
                                          destruction_types_map):
        hmo = get_crom_object(data)
        location = data.get('present_location')
        if location:
            loc = location.get('geog')
            note = location.get('note')
            if loc:
                if 'destroyed ' in loc.lower():
                    self.populate_destruction_events(
                        data, loc, type_map=destruction_types_map)
                elif isinstance(note, str) and 'destroyed ' in note.lower():
                    # the object was destroyed, so any "present location" data is actually
                    # an indication of the location of destruction.
                    self.populate_destruction_events(
                        data,
                        note,
                        type_map=destruction_types_map,
                        location=loc)
                else:
                    # TODO: if `parse_location_name` fails, still preserve the location string somehow
                    current = parse_location_name(
                        loc, uri_base=self.helper.uid_tag_prefix)
                    inst = location.get('inst')
                    if inst:
                        owner_data = {
                            'label': f'{inst} ({loc})',
                            'identifiers':
                            [model.Name(ident='', content=inst)]
                        }
                        ulan = None
                        with suppress(ValueError, TypeError):
                            ulan = int(location.get('insi'))
                        if ulan:
                            owner_data['ulan'] = ulan
                            owner_data['uri'] = self.helper.make_proj_uri(
                                'ORG', 'ULAN', ulan)
                        else:
                            owner_data['uri'] = self.helper.make_proj_uri(
                                'ORG', 'NAME', inst, 'PLACE', loc)
                    else:
                        owner_data = {
                            'label':
                            '(Anonymous organization)',
                            'uri':
                            self.helper.make_proj_uri('ORG', 'CURR-OWN',
                                                      *now_key),
                        }

                    if note:
                        owner_data['note'] = note

                    base_uri = hmo.id + '-Place,'
                    place_data = self.helper.make_place(current,
                                                        base_uri=base_uri)
                    place = get_crom_object(place_data)

                    make_la_org = pipeline.linkedart.MakeLinkedArtOrganization(
                    )
                    owner_data = make_la_org(owner_data)
                    owner = get_crom_object(owner_data)

                    acc = location.get('acc')
                    if acc:
                        acc_number = vocab.AccessionNumber(ident='',
                                                           content=acc)
                        hmo.identified_by = acc_number
                        assignment = model.AttributeAssignment(ident='')
                        assignment.carried_out_by = owner
                        acc_number.assigned_by = assignment

                    owner.residence = place
                    data['_locations'].append(place_data)
                    data['_final_org'] = owner_data
            else:
                pass  # there is no present location place string
示例#28
0
    def related_procurement(self,
                            hmo,
                            tx_label_args,
                            current_tx=None,
                            current_ts=None,
                            buyer=None,
                            seller=None,
                            previous=False,
                            ident=None,
                            make_label=None):
        '''
		Returns a new `vocab.ProvenanceEntry` object (and related acquisition) that is temporally
		related to the supplied procurement and associated data. The new procurement is for
		the given object, and has the given buyer and seller (both optional).

		If the `previous` flag is `True`, the new procurement is occurs before `current_tx`,
		and if the timespan `current_ts` is given, has temporal data to that effect. If
		`previous` is `False`, this relationship is reversed.
		
		The `make_label` argument, if supplied, is used as a function to generate the
		label for the provenance entry. Its arguments (generated in `handle_prev_post_owner`)
		are:
		
		  * helper: the helper object for the current pipeline
		  * sale_type: the sale type passed to `handle_prev_post_owner` (e.g. "Auction")
		  * transaction: the transaction type being handled (e.g. "Sold")
		  * rel: a string describing the relationship between this provenance entry and the object (e.g. "leading to the previous ownership of")
		  * N trailing arguments used that are the contents of the `lot_object_key` tuple passed to `handle_prev_post_owner`
		'''
        def _make_label_default(helper, sale_type, transaction, rel, *args):
            strs = [str(x) for x in args]
            return ', '.join(strs)

        if make_label is None:
            make_label = _make_label_default

        tx = vocab.ProvenanceEntry(ident=ident)
        tx_label = make_label(*tx_label_args)
        tx._label = tx_label
        tx.identified_by = model.Name(ident='', content=tx_label)

        if current_tx:
            if previous:
                tx.ends_before_the_start_of = current_tx
            else:
                tx.starts_after_the_end_of = current_tx
        modifier_label = 'Previous' if previous else 'Subsequent'
        try:
            pacq = model.Acquisition(
                ident='',
                label=f'{modifier_label} Acquisition of: “{hmo._label}”')
            pxfer = model.TransferOfCustody(
                ident='',
                label=f'{modifier_label} Transfer of Custody of: “{hmo._label}”'
            )
        except AttributeError:
            pacq = model.Acquisition(ident='',
                                     label=f'{modifier_label} Acquisition')
            pxfer = model.TransferOfCustody(
                ident='', label=f'{modifier_label} Transfer of Custody')
        pacq.transferred_title_of = hmo
        pxfer.transferred_custody_of = hmo
        if buyer:
            pacq.transferred_title_to = buyer
            pxfer.transferred_custody_to = buyer
        if seller:
            pacq.transferred_title_from = seller
            pxfer.transferred_custody_from = seller

        tx.part = pacq
        tx.part = pxfer
        if current_ts:
            if previous:
                pacq.timespan = timespan_before(current_ts)
            else:
                pacq.timespan = timespan_after(current_ts)
        return tx, pacq
示例#29
0
    def set_properties(self, data, thing):
        super().set_properties(data, thing)

        # TODO: this whole title_type thing isn't right. most of the identifiers below aren't titles
        title_type = model.Type(ident='http://vocab.getty.edu/aat/300417193',
                                label='Title')
        name = None
        if 'label' in data:
            name = set_la_name(thing,
                               data['label'],
                               title_type,
                               set_label=True)

        for author in data.get('created_by', []):
            thing.created_by = author

        for a in data.get('used_for', []):
            thing.used_for = a

        for a in data.get('about', []):
            thing.about = a

        for c in data.get('classified_as', []):
            thing.classified_as = c

        for t in data.get('translations', []):
            n = set_la_name(thing, t, title_type)
            if name is not None:
                n.translation_of = name

        for content, itype, notes in data.get('qualified_identifiers', []):
            ident = itype(content=content)
            if not content:
                warnings.warn(f'Setting empty identifier on {thing.id}')
            thing.identified_by = ident
            for n in notes:
                ident.referred_to_by = n

        code_type = None  # TODO: is there a model.Type value for this sort of code?
        for c in data.get('classifications', []):
            if isinstance(c, model.Type):
                classification = c
            else:
                cid, label = c
                name = model.Name()
                name.classified_as = title_type
                name.content = label

                classification = model.Type(label=label)
                if not label:
                    warnings.warn(f'Setting empty name on {classification.id}')
                classification.identified_by = name

                code = model.Identifier()
                code.classified_as = code_type
                if not cid:
                    warnings.warn(f'Setting empty identifier on {code.id}')
                code.content = cid
                classification.identified_by = code
            thing.about = classification

        for c in data.get('indexing', []):
            if isinstance(c, tuple):
                cid, label = c
                name = model.Name()
                name.classified_as = title_type
                name.content = label

                indexing = model.Type(label=label)
                if not label:
                    warnings.warn(f'Setting empty name on {indexing.id}')
                indexing.identified_by = name

                code = model.Identifier()
                code.classified_as = code_type
                code.content = cid
                if not cid:
                    warnings.warn(f'Setting empty identifier on {code.id}')
                indexing.identified_by = code
            else:
                indexing = c
            thing.about = indexing

        parents = data.get('part_of', [])
        for parent_data in parents:
            parent = get_crom_object(parent_data)
            thing.part_of = parent

        children = data.get('part', [])
        for child_data in children:
            child = get_crom_object(child_data)
            thing.part = child

        for carrier in data.get('carried_by', []):
            hmo = get_crom_object(carrier)
            thing.carried_by = hmo

        for dimension in data.get('dimensions', []):
            thing.dimension = dimension
示例#30
0
    def make_place(self, data: dict, base_uri=None):
        '''
		Given a dictionary representing data about a place, construct a model.Place object,
		assign it as the crom data in the dictionary, and return the dictionary.

		The dictionary keys used to construct the place object are:

		- name
		- type (one of: 'City', 'State', 'Province', 'Country', or 'Sovereign')
		- part_of (a recursive place dictionary)
		
		If the name matches a known unique location (derived from the unique_locations
		service data), the normal recursive handling of part_of data is bypassed, using
		the 
		'''
        # 		unique_locations = self.unique_locations
        canonical_location_names = self.canonical_location_names
        TYPES = {
            'city': vocab.instances['city'],
            'county': vocab.instances['county'],
            'province': vocab.instances['province'],
            'state': vocab.instances['province'],
            'country': vocab.instances['nation'],
            'sovereign': vocab.instances['sovereign'],
        }

        if data is None:
            return None
        type_name = data.get('type', 'place').lower()

        name = data.get('name')
        si = self.static_instances

        names = data.get('names', [])
        label = name
        parent_data = data.get('part_of')

        place_type = TYPES.get(type_name)

        parent = None

        if name.casefold() in canonical_location_names:
            name = canonical_location_names.get(name.casefold(), name)
            label = name
        elif parent_data:
            parent_data = self.make_place(parent_data, base_uri=base_uri)
            parent = get_crom_object(parent_data)
            if label:
                label = f'{label}, {parent._label}'

        placeargs = {}
        p = None
        if si:
            p = si.get_instance('Place', name)
            if not p:
                p = si.get_instance('Place', label)

            if p:
                # this is a static instance. we need to re-thread the part_of relationship
                # in the data dictionary, because the serialization depends on the dictionary
                # data, not the properties of the modeled object
                # 				from cromulent.model import factory
                # 				print(f'PLACE: {name} => {factory.toString(p, False)}')
                add_crom_data(data=data, what=p)
                queue = [data]
                while queue:
                    place_data = queue.pop(0)
                    place = get_crom_object(place_data)
                    parents = getattr(place, 'part_of', []) or []
                    if parents:
                        for parent in parents:
                            if parent:
                                if 'part_of' not in place_data:
                                    parent_data = add_crom_data(data={},
                                                                what=parent)
                                    place_data['part_of'] = parent_data
                                else:
                                    parent_data = add_crom_data(
                                        data=place_data['part_of'],
                                        what=parent)
                                queue.append(parent_data)
                    elif 'part_of' in place_data:
                        parent_data = self.make_place(place_data['part_of'],
                                                      base_uri=base_uri)
                        queue.append(parent_data)
        if p:
            return data

        if label:
            placeargs['label'] = label

        if data.get('uri'):
            placeargs['ident'] = data['uri']
# 		elif label.casefold() in canonical_location_names:
# 			label = canonical_location_names[label.casefold()]
# 			data['uri'] = self.make_shared_uri('PLACE', label)
# 			placeargs['ident'] = data['uri']
        elif base_uri:
            data['uri'] = base_uri + urllib.parse.quote(label)
            placeargs['ident'] = data['uri']

        if not p:
            p = model.Place(**placeargs)
            if place_type:
                p.classified_as = place_type
            if name:
                p.identified_by = vocab.PrimaryName(ident='', content=name)
            else:
                warnings.warn(f'Place with missing name on {p.id}')
            for name in names:
                if name:
                    p.identified_by = model.Name(ident='', content=name)
            if parent:
                p.part_of = parent
                data['part_of'] = parent_data
        return add_crom_data(data=data, what=p)