def import_digitizedwork(self, htid): '''Import a single work into the database. Retrieves bibliographic data from Hathi api. If the record already exists in the database, it is only updated if the hathi record has changed or if an update is requested by the user. Creates admin log entry for record creation or record update. Returns None if there is an error retrieving bibliographic data or no update is needed; otherwise, returns the :class:`~ppa.archive.models.DigitizedWork`.''' # store the current time to find log entries created after before = now() try: digwork = DigitizedWork.add_from_hathi( htid, self.bib_api, update=self.options['update'], log_msg_src='via hathi_import script') except HathiItemNotFound: self.stdout.write("Error: Bibliographic data not found for '%s'" % htid) self.stats['error'] += 1 return # check log entries for this record to determine what was done log_entries = LogEntry.objects.filter( content_type_id=self.digwork_content_type.pk, object_id=digwork.pk, action_time__gte=before) # no log entry - nothing was done (not new, no update needed) if not log_entries.exists(): # local copy is newer than last source modification date if self.verbosity > self.v_normal: self.stdout.write( 'Source record last updated %s, no update needed' % digwork.updated.date()) # nothing to do; continue to next item self.stats['skipped'] += 1 elif log_entries.first().action_flag == CHANGE: # report if record was changed and update not forced if not self.options['update']: self.stdout.write( 'Source record last updated %s, update needed' % digwork.updated.date()) # count the update self.stats['updated'] += 1 elif log_entries.first().action_flag == ADDITION: # count the new record self.stats['created'] += 1 return digwork
def add_items(self, log_msg_src=None, user=None): '''Add new items from HathiTrust. :params log_msg_src: optional source of change to be included in log entry message ''' # disconnect indexing signal handler before adding new content IndexableSignalHandler.disconnect() for htid in self.htids: try: digwork = DigitizedWork.add_from_hathi(htid, self.bib_api, get_data=True, log_msg_src=log_msg_src, user=user) if digwork: self.imported_works.append(digwork) self.results[htid] = self.SUCCESS except (hathi.HathiItemNotFound, JSONDecodeError, hathi.HathiItemForbidden) as err: # json decode error occurred 3/26/2019 - catalog was broken # and gave a 200 Ok response with PHP error content # hopefully temporary, but could occur again... # store the actual error as the results, so that # downstream code can report as desired self.results[htid] = err # remove the partial record if one was created # (i.e. if metadata succeeded but data failed) DigitizedWork.objects.filter(source_id=htid).delete() # reconnect indexing signal handler IndexableSignalHandler.connect()
def test_add_from_hathi(self, mock_hathibib_api, mock_get_hathi_data, mock_pop_from_bibdata): script_user = User.objects.get(username=settings.SCRIPT_USERNAME) # add new with default opts test_htid = 'abc:12345' digwork = DigitizedWork.add_from_hathi(test_htid) assert isinstance(digwork, DigitizedWork) mock_hathibib_api.assert_called_with() mock_hathibib = mock_hathibib_api.return_value mock_hathibib.record.assert_called_with('htid', test_htid) mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value) mock_get_hathi_data.assert_not_called() # log entry should exist for record creation only log_entries = LogEntry.objects.filter(object_id=digwork.id) # should only be one log entry assert log_entries.count() == 1 log_entry = log_entries.first() assert log_entry.user == script_user assert log_entry.content_type == ContentType.objects.get_for_model(DigitizedWork) # default log message for new record assert log_entry.change_message == 'Created from HathiTrust bibliographic data' assert log_entry.action_flag == ADDITION # add new with bib api pased in, get data, and custom message my_bib_api = Mock() mock_hathibib_api.reset_mock() test_htid = 'def:678910' digwork = DigitizedWork.add_from_hathi( test_htid, bib_api=my_bib_api, get_data=True, log_msg_src='in unit tests') mock_hathibib_api.assert_not_called() my_bib_api.record.assert_called_with('htid', test_htid) assert mock_get_hathi_data.call_count == 1 log_entry = LogEntry.objects.get(object_id=digwork.id) assert log_entry.change_message == 'Created in unit tests' # update existing record - no change on hathi, not forced digwork_updated = digwork.updated # store local record updated time mockhathirecord = mock_hathibib.record.return_value # set hathi record last updated before digwork last update mockhathirecord.copy_last_updated.return_value = date.today() - timedelta(days=1) digwork = DigitizedWork.add_from_hathi(test_htid) # bib api should still be called mock_hathibib.record.assert_called_with('htid', test_htid) # record update time should be unchanged assert digwork.updated == digwork_updated # still only one log entry assert LogEntry.objects.filter(object_id=digwork.id).count() == 1 # update existing record - no change on hathi, update forced mock_pop_from_bibdata.reset_mock() digwork = DigitizedWork.add_from_hathi(test_htid, update=True) # record update time should be changed assert digwork.updated != digwork_updated mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value) # new log entry should be added assert LogEntry.objects.filter(object_id=digwork.id).count() == 2 # log entry should exist for record update; get newest log_entry = LogEntry.objects.filter(object_id=digwork.id) \ .order_by('-action_time').first() assert log_entry.action_flag == CHANGE assert log_entry.change_message.startswith('Updated') assert '(forced update)' in log_entry.change_message # update existing record - changed on hathi, should auto update # set hathi record last updated *after* digwork last update mock_pop_from_bibdata.reset_mock() mockhathirecord.copy_last_updated.return_value = date.today() + timedelta(days=1) digwork_updated = digwork.updated # store local record updated time digwork = DigitizedWork.add_from_hathi(test_htid) # record update time should be changed assert digwork.updated != digwork_updated mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value) # new log entry should be added assert LogEntry.objects.filter(object_id=digwork.id).count() == 3 # newest log entry should be an update assert LogEntry.objects.filter(object_id=digwork.id) \ .order_by('-action_time').first().action_flag == CHANGE