Exemplo n.º 1
0
    def handle_places(self, data):
        base_uri = self.helper.make_proj_uri('PLACE', '')
        for loc in data.get('locations', []):
            l = loc.get('location')
            if l:
                current = parse_location_name(l,
                                              uri_base=self.helper.proj_prefix)
                place_data = self.helper.make_place(current, base_uri=base_uri)
                data['places'].append(place_data)
                note = loc.get('location_note')
                if note:
                    note = vocab.Note(ident='', content=note)
                    data['referred_to_by'].append(note)
                date = loc.get('location_date')
                if date:
                    note = vocab.BibliographyStatement(
                        ident='', content=f'Residence in {l} ({date})')
                    data['referred_to_by'].append(note)

            address = loc.get('address')
            if address:
                contact = model.Identifier(ident='', content=address)
                contact_data = add_crom_data(data={}, what=contact)
                data['contact_point'].append(contact_data)
                note = loc.get('address_note')
                if note:
                    note = vocab.Note(ident='', content=note)
                    data['referred_to_by'].append(note)
                date = loc.get('address_date')
                if date:
                    note = vocab.BibliographyStatement(
                        ident='', content=f'Address at {l} ({date})')
                    data['referred_to_by'].append(note)
Exemplo n.º 2
0
    def handle_statements(self, data):
        text_content = data.get('text')
        if text_content:
            cite = vocab.BiographyStatement(ident='', content=text_content)
            data['referred_to_by'].append(cite)

        source_content = data.get('source')
        if source_content:
            cite = vocab.BibliographyStatement(ident='',
                                               content=source_content)
            data['referred_to_by'].append(cite)

        project = data.get('project')
        if project:
            data['referred_to_by'].append(
                vocab.SourceStatement(ident='', content=project))

        awards = {
            l.strip()
            for l in data.get('medal_received', '').split(';')
        } - {''}
        for award in awards:
            cite = vocab.Note(
                ident='', content=award
            )  # TODO: add proper classification for an Awards Statement
            data['referred_to_by'].append(cite)
Exemplo n.º 3
0
def extract_monetary_amount(data,
                            add_citations=False,
                            currency_mapping=CURRENCY_MAPPING,
                            source_mapping=None,
                            truncate_label_digits=2):
    '''
	Returns a `MonetaryAmount`, `StartingPrice`, or `EstimatedPrice` object
	based on properties of the supplied `data` dict. If no amount or currency
	data is found in found, returns `None`.

	For `EstimatedPrice`, values will be accessed from these keys:
		- amount: `est_price_amount` or `est_price`
		- currency: `est_price_currency` or `est_price_curr`
		- note: `est_price_note` or `est_price_desc`
		- bibliographic statement: `est_price_citation`

	For `StartingPrice`, values will be accessed from these keys:
		- amount: `start_price_amount` or `start_price`
		- currency: `start_price_currency` or `start_price_curr`
		- note: `start_price_note` or `start_price_desc`
		- bibliographic statement: `start_price_citation`

	For `MonetaryAmount` prices, values will be accessed from these keys:
		- amount: `price_amount` or `price`
		- currency: `price_currency` or `price_curr`
		- note: `price_note` or `price_desc`
		- bibliographic statement: `price_citation`
	'''
    amount_type = 'Price'
    if 'price' in data or 'price_amount' in data or 'amount' in data:
        amnt = model.MonetaryAmount(ident='')
        price_amount = data.get('price_amount',
                                data.get('price', data.get('amount')))
        price_currency = data.get(
            'currency', data.get('price_currency', data.get('price_curr')))
        note = data.get('price_note', data.get('price_desc', data.get('note')))
        cite = data.get('price_citation', data.get('citation'))
        source = data.get('price_source', '')
    elif 'est_price' in data or 'est_price_amount' in data:
        amnt = vocab.EstimatedPrice(ident='')
        price_amount = data.get('est_price_amount', data.get('est_price'))
        price_currency = data.get(
            'est_price_currency',
            data.get('est_price_curr', data.get('currency')))
        amount_type = 'Estimated Price'
        note = data.get('est_price_note',
                        data.get('est_price_desc', data.get('note')))
        cite = data.get('est_price_citation', data.get('citation'))
        source = data.get('est_price_source', data.get('est_price_so', ''))
    elif 'start_price' in data or 'start_price_amount' in data:
        amnt = vocab.StartingPrice(ident='')
        price_amount = data.get('start_price_amount', data.get('start_price'))
        price_currency = data.get(
            'start_price_currency',
            data.get('start_price_curr', data.get('currency')))
        amount_type = 'Starting Price'
        note = data.get('start_price_note',
                        data.get('start_price_desc', data.get('note')))
        cite = data.get('start_price_citation', data.get('citation'))
        source = data.get('start_price_source', data.get('start_price_so', ''))
    elif 'ask_price' in data or 'ask_price_amount' in data:
        amnt = vocab.AskingPrice(ident='')
        price_amount = data.get('ask_price_amount', data.get('ask_price'))
        price_currency = data.get(
            'ask_price_currency',
            data.get('ask_price_curr', data.get('currency')))
        amount_type = 'Asking Price'
        note = data.get('ask_price_note',
                        data.get('ask_price_desc', data.get('note')))
        cite = data.get('ask_price_citation', data.get('citation'))
        source = data.get('ask_price_source', data.get('ask_price_so', ''))
    else:
        return None

    price_amount_label = price_amount
    if price_amount or price_currency:
        if cite and add_citations:
            amnt.referred_to_by = vocab.BibliographyStatement(ident='',
                                                              content=cite)
        if note:
            amnt.referred_to_by = vocab.Note(ident='', content=note)

        if price_amount:
            try:
                value = price_amount
                value = value.replace('[?]', '')
                value = value.replace('?', '')
                value = value.strip()
                if re.search(re.compile(r',\d\d\d'), value):
                    value = value.replace(',', '')
                value = float(value)

                label_fmt = '{:,.%df}' % truncate_label_digits
                price_amount_label = label_fmt.format(value)

                amnt.value = value
            except ValueError:
                amnt._label = price_amount
                amnt.identified_by = model.Name(ident='', content=price_amount)
    # 			warnings.warn(f'*** Not a numeric price amount: {value}')
        if price_currency:
            price_currency_key = price_currency
            try:
                price_currency_key = currency_mapping[
                    price_currency_key.lower()]
            except KeyError:
                pass
            if isinstance(price_currency_key, model.BaseResource):
                amnt.currency = price_currency_key
            elif price_currency_key in vocab.instances:
                amnt.currency = vocab.instances[price_currency_key]
            else:
                warnings.warn('*** No currency instance defined for %s' %
                              (price_currency_key, ))
        if price_amount_label and price_currency:
            amnt._label = '%s %s' % (price_amount_label, price_currency)
        elif price_amount:
            amnt._label = '%s' % (price_amount, )
        return amnt
    return None
Exemplo n.º 4
0
    def add_props(self, data: dict, role=None, **kwargs):
        role = role if role else 'person'
        auth_name = data.get('auth_name', '')
        period_match = self.anon_period_re.match(auth_name)
        nationalities = []
        if 'nationality' in data:
            nationality = data['nationality']
            if isinstance(nationality, str):
                nationalities.append(nationality.lower())
            elif isinstance(nationality, list):
                nationalities += [n.lower() for n in nationality]

        data['nationality'] = []
        data.setdefault('referred_to_by', [])

        name = data['label']
        active = self.clamped_timespan_args(data, name)
        if active:
            pact_uri = data['uri'] + '-ProfAct-active'
            a = self.professional_activity(name, ident=pact_uri, **active)
            data['events'].append(a)

        for key in ('notes', 'brief_notes', 'working_notes'):
            if key in data:
                for content in [n.strip() for n in data[key].split(';')]:
                    if content:
                        note = vocab.Note(ident='', content=content)
                        data['referred_to_by'].append(note)

        for key in ('name_cite', 'bibliography'):
            if data.get(key):
                cite = vocab.BibliographyStatement(ident='', content=data[key])
                data['referred_to_by'].append(cite)

        if data.get('name_cite'):
            cite = vocab.BibliographyStatement(ident='',
                                               content=data['name_cite'])
            data['referred_to_by'].append(cite)

        if self.is_anonymous_group(auth_name):
            nationality_match = self.anon_nationality_re.match(auth_name)
            dated_nationality_match = self.anon_dated_nationality_re.match(
                auth_name)
            dated_match = self.anon_dated_re.match(auth_name)
            data.setdefault('events', [])
            if nationality_match:
                with suppress(ValueError):
                    nationality = nationality_match.group(1).lower()
                    nationalities.append(nationality)
                    group_label = self.anonymous_group_label(
                        role, nationality=nationality)
                    data['label'] = group_label
            elif dated_nationality_match:
                with suppress(ValueError):
                    nationality = dated_nationality_match.group(1).lower()
                    nationalities.append(nationality)
                    century = int(dated_nationality_match.group(2))
                    group_label = self.anonymous_group_label(
                        role, century=century, nationality=nationality)
                    data['label'] = group_label
                    pact_uri = data['uri'] + '-ProfAct-dated-natl'
                    a = self.professional_activity(group_label,
                                                   ident=pact_uri,
                                                   century=century,
                                                   narrow=True)
                    data['events'].append(a)
            elif dated_match:
                with suppress(ValueError):
                    century = int(dated_match.group(1))
                    group_label = self.anonymous_group_label(role,
                                                             century=century)
                    data['label'] = group_label
                    pact_uri = data['uri'] + '-ProfAct-dated'
                    a = self.professional_activity(group_label,
                                                   ident=pact_uri,
                                                   century=century,
                                                   narrow=True)
                    data['events'].append(a)
            elif period_match:
                period = period_match.group(1).lower()
                data['label'] = f'anonymous {period} {role}s'
        for nationality in nationalities:
            key = f'{nationality.lower()} nationality'
            n = vocab.instances.get(key)
            if n:
                data['nationality'].append(n)
            else:
                warnings.warn(
                    f'No nationality instance found in crom for: {key!r}')
Exemplo n.º 5
0
def extract_monetary_amount(data):
	'''
	Returns a `MonetaryAmount`, `StartingPrice`, or `EstimatedPrice` object
	based on properties of the supplied `data` dict. If no amount or currency
	data is found in found, returns `None`.

	For `EstimatedPrice`, values will be accessed from these keys:
		- amount: `est_price_amount` or `est_price`
		- currency: `est_price_currency` or `est_price_curr`
		- note: `est_price_note` or `est_price_desc`
		- bibliographic statement: `est_price_citation`

	For `StartingPrice`, values will be accessed from these keys:
		- amount: `start_price_amount` or `start_price`
		- currency: `start_price_currency` or `start_price_curr`
		- note: `start_price_note` or `start_price_desc`
		- bibliographic statement: `start_price_citation`

	For `MonetaryAmount` prices, values will be accessed from these keys:
		- amount: `price_amount` or `price`
		- currency: `price_currency` or `price_curr`
		- note: `price_note` or `price_desc`
		- bibliographic statement: `price_citation`
	'''
	amount_type = 'Price'
	if 'est_price' in data:
		amnt = vocab.EstimatedPrice()
		price_amount = data.get('est_price_amount', data.get('est_price'))
		price_currency = data.get('est_price_currency', data.get('est_price_curr'))
		amount_type = 'Estimated Price'
		note = data.get('est_price_note', data.get('est_price_desc'))
		cite = data.get('est_price_citation')
	elif 'start_price' in data:
		amnt = vocab.StartingPrice()
		price_amount = data.get('start_price_amount', data.get('start_price'))
		price_currency = data.get('start_price_currency', data.get('start_price_curr'))
		amount_type = 'Starting Price'
		note = data.get('start_price_note', data.get('start_price_desc'))
		cite = data.get('start_price_citation')
	else:
		amnt = model.MonetaryAmount()
		price_amount = data.get('price_amount', data.get('price'))
		price_currency = data.get('price_currency', data.get('price_curr'))
		note = data.get('price_note', data.get('price_desc'))
		cite = data.get('price_citation')

	if price_amount or price_currency:
		if cite:
			amnt.referred_to_by = vocab.BibliographyStatement(content=cite)
		if note:
			amnt.referred_to_by = vocab.Note(content=note)

		if price_amount:
			try:
				value = price_amount
				value = value.replace('[?]', '')
				value = value.replace('?', '')
				value = value.strip()
				price_amount = float(value)
				amnt.value = price_amount
			except ValueError:
				amnt._label = price_amount
				amnt.identified_by = model.Name(content=price_amount)
	# 			warnings.warn(f'*** Not a numeric price amount: {value}')
		if price_currency:
			if price_currency in CURRENCY_MAPPING:
				try:
					price_currency = CURRENCY_MAPPING[price_currency.lower()]
				except KeyError:
					pass
			if price_currency in vocab.instances:
				amnt.currency = vocab.instances[price_currency]
			else:
				warnings.warn('*** No currency instance defined for %s' % (price_currency,))
		if price_amount and price_currency:
			amnt._label = '%s %s' % (price_amount, price_currency)
		elif price_amount:
			amnt._label = '%s' % (price_amount,)
		return amnt
	return None
Exemplo n.º 6
0
    def __call__(self, data, non_auctions, event_properties,
                 problematic_records, transaction_types):
        '''Add modeling data for the auction of a lot of objects.'''
        self.helper.copy_source_information(data['_object'], data)

        auction_houses_data = event_properties['auction_houses']

        auction_locations = event_properties['auction_locations']
        auction_data = data['auction_of_lot']
        try:
            lot_object_key = object_key(auction_data)
        except Exception as e:
            warnings.warn(
                f'Failed to compute lot object key from data {auction_data} ({e})'
            )
            pprint.pprint({k: v
                           for k, v in data.items() if v != ''},
                          stream=sys.stderr)
            raise
        cno, lno, date = lot_object_key
        sale_type = non_auctions.get(cno, 'Auction')

        ask_price = data.get('ask_price', {}).get('ask_price')
        if ask_price:
            # if there is an asking price/currency, it's a direct sale, not an auction;
            # filter these out from subsequent modeling of auction lots.
            warnings.warn(
                f'Skipping {cno} {lno} because it asserts an asking price')
            return

        if sale_type != 'Auction':
            # the records in this sales catalog do not represent auction sales, so the
            # price data should not be asserted as a sale price, but instead as an
            # asking price.
            with suppress(KeyError):
                prices = data['price']
                del data['price']
                if prices:
                    price_data = prices[0]
                    price = get_crom_object(price_data)
                    if price:
                        ma = vocab.add_classification(price, vocab.AskingPrice)
                        data['ask_price'] = add_crom_data(price_data, ma)

        shared_lot_number = self.helper.shared_lot_number_from_lno(lno)
        uid, uri = self.helper.shared_lot_number_ids(cno, lno, date)
        sale_data = {'uid': uid, 'uri': uri}

        lot = self.helper.sale_for_sale_type(sale_type, lot_object_key)
        data['lot_object_id'] = f'{cno} {lno} ({date})'

        if 'link_to_pdf' in auction_data:
            url = auction_data['link_to_pdf']
            page = vocab.WebPage(ident=url, label=url)
            lot.referred_to_by = page

        for problem_key, problem in problematic_records.get('lots', []):
            # TODO: this is inefficient, but will probably be OK so long as the number
            #       of problematic records is small. We do it this way because we can't
            #       represent a tuple directly as a JSON dict key, and we don't want to
            #       have to do post-processing on the services JSON files after loading.
            if tuple(problem_key) == lot_object_key:
                note = model.LinguisticObject(ident='', content=problem)
                problem_classification = model.Type(
                    ident=self.helper.problematic_record_uri,
                    label='Problematic Record')
                problem_classification.classified_as = vocab.instances[
                    "brief text"]
                note.classified_as = problem_classification
                lot.referred_to_by = note

        cite_content = []
        if data.get('transaction_so'):
            cite_content.append(data['transaction_so'])
        if data.get('transaction_cite'):
            cite_content.append(data['transaction_cite'])
        if cite_content:
            content = ', '.join(cite_content)
            cite = vocab.BibliographyStatement(
                ident='', content=content, label='Source of transaction type')
            cite.identified_by = model.Name(
                ident='', content='Source of transaction type')
            lot.referred_to_by = cite

        transaction = data.get('transaction')
        SOLD = transaction_types['sold']
        WITHDRAWN = transaction_types['withdrawn']
        self.set_lot_objects(lot, cno, lno, sale_data['uri'], data, sale_type)
        auction, _, _ = self.helper.sale_event_for_catalog_number(
            cno, sale_type)
        if transaction not in WITHDRAWN:
            lot.part_of = auction
            event_dates = event_properties['auction_dates'].get(cno)

            auction_houses = [
                get_crom_object(self.helper.add_auction_house_data(h.copy()))
                for h in auction_houses_data.get(cno, [])
            ]

            self.set_lot_auction_houses(lot, cno, auction_houses)
            self.set_lot_location(lot, cno, auction_locations)
            self.set_lot_date(lot, auction_data, event_dates)
            self.set_lot_notes(lot, auction_data, sale_type)

            tx_uri = self.helper.transaction_uri_for_lot(auction_data, data)
            lots = self.helper.lots_in_transaction(auction_data, data)
            tx = vocab.ProvenanceEntry(ident=tx_uri)
            tx_label = prov_entry_label(self.helper, sale_type, transaction,
                                        'of', cno, lots, date)
            tx._label = tx_label
            tx.identified_by = model.Name(ident='', content=tx_label)
            tx.caused_by = lot
            tx_data = {'uri': tx_uri}

            if transaction in SOLD:
                if sale_type == 'Auction':
                    # the records in this sales catalog represent auction sales, so the
                    # price data for a sale should be asserted as a hammer price.
                    with suppress(KeyError):
                        prices = data['price']
                        if prices:
                            price_data = prices[0]
                            price = get_crom_object(price_data)
                            if price:
                                vocab.add_classification(
                                    price, vocab.HammerPrice)

                multi = self.helper.transaction_contains_multiple_lots(
                    auction_data, data)
                if multi:
                    tx_data['multi_lot_tx'] = lots

            with suppress(AttributeError):
                tx_data['_date'] = lot.timespan
            data['_prov_entry_data'] = add_crom_data(data=tx_data, what=tx)

            data['_event_causing_prov_entry'] = add_crom_data(data=sale_data,
                                                              what=lot)
        yield data
Exemplo n.º 7
0
    def add_props(self, data: dict, role=None, **kwargs):
        role = role if role else 'person'
        auth_name = data.get('auth_name', '')
        period_match = self.anon_period_re.match(auth_name)
        nationalities = []
        if 'nationality' in data:
            nationality = data['nationality']
            if isinstance(nationality, str):
                nationalities += [
                    n.lower().strip() for n in nationality.split(';')
                ]
            elif isinstance(nationality, list):
                nationalities += [n.lower() for n in nationality]

        data['nationality'] = []
        data.setdefault('referred_to_by', [])

        # 		name = data['label']
        # 		active = self.clamped_timespan_args(data, name)
        # 		cb = data.get('corporate_body')
        # 		if active:
        # 			pact_uri = data['uri'] + '-ProfAct-active'
        # 			a = self.professional_activity(name, ident=pact_uri, **active)
        # 			data['events'].append(a)

        notes_field_classification = {
            'brief_notes': (vocab.BiographyStatement, vocab.External),
            'text': (vocab.BiographyStatement, vocab.Internal),
            'internal_notes': (vocab.BiographyStatement, vocab.Internal),
            'working_notes': (vocab.ResearchStatement, vocab.Internal),
        }
        for key, note_classification in notes_field_classification.items():
            if key in data:
                for content in [n.strip() for n in data[key].split(';')]:
                    cite = vocab.make_multitype_obj(*note_classification,
                                                    ident='',
                                                    content=content)
                    data['referred_to_by'].append(cite)

        for key in ('name_cite', 'bibliography'):
            if data.get(key):
                cite = vocab.BibliographyStatement(ident='', content=data[key])
                data['referred_to_by'].append(cite)

        if data.get('name_cite'):
            cite = vocab.BibliographyStatement(ident='',
                                               content=data['name_cite'])
            data['referred_to_by'].append(cite)

        if self.is_anonymous_group(auth_name):
            nationality_match = self.anon_nationality_re.match(auth_name)
            dated_nationality_match = self.anon_dated_nationality_re.match(
                auth_name)
            dated_match = self.anon_dated_re.match(auth_name)
            data.setdefault('events', [])
            if nationality_match:
                with suppress(ValueError):
                    nationality = nationality_match.group(1).lower()
                    nationalities.append(nationality)
                    group_label = self.anonymous_group_label(
                        role, nationality=nationality)
                    data['label'] = group_label
            elif dated_nationality_match:
                with suppress(ValueError):
                    nationality = dated_nationality_match.group(1).lower()
                    nationalities.append(nationality)
                    century = int(dated_nationality_match.group(2))
                    group_label = self.anonymous_group_label(
                        role, century=century, nationality=nationality)
                    data['label'] = group_label
                    pact_uri = data['uri'] + '-ProfAct-dated-natl'
                    a = self.professional_activity(
                        group_label,
                        classified_as=[vocab.ActiveOccupation],
                        ident=pact_uri,
                        century=century,
                        narrow=True)
                    data['events'].append(a)
            elif dated_match:
                with suppress(ValueError):
                    century = int(dated_match.group(1))
                    group_label = self.anonymous_group_label(role,
                                                             century=century)
                    data['label'] = group_label
                    pact_uri = data['uri'] + '-ProfAct-dated'
                    a = self.professional_activity(
                        group_label,
                        classified_as=[vocab.ActiveOccupation],
                        ident=pact_uri,
                        century=century,
                        narrow=True)
                    data['events'].append(a)
            elif period_match:
                period = period_match.group(1).lower()
                data['label'] = f'anonymous {period} {role}s'
        for nationality in nationalities:
            key = f'{nationality.lower()} nationality'
            n = vocab.instances.get(key)
            if n:
                data['nationality'].append(n)
            else:
                warnings.warn(
                    f'No nationality instance found in crom for: {key!r}')