Exemplo n.º 1
0
    def import_digitizedwork(self, htid):
        '''Import a single work into the database.
        Retrieves bibliographic data from Hathi api. If the record already
        exists in the database, it is only updated if the hathi record
        has changed or if an update is requested by the user.
        Creates admin log entry for record creation or record update.
        Returns None if there is an error retrieving bibliographic data
        or no update is needed; otherwise, returns the
        :class:`~ppa.archive.models.DigitizedWork`.'''

        # store the current time to find log entries created after
        before = now()

        try:
            digwork = DigitizedWork.add_from_hathi(
                htid,
                self.bib_api,
                update=self.options['update'],
                log_msg_src='via hathi_import script')
        except HathiItemNotFound:
            self.stdout.write("Error: Bibliographic data not found for '%s'" %
                              htid)
            self.stats['error'] += 1
            return

        # check log entries for this record to determine what was done
        log_entries = LogEntry.objects.filter(
            content_type_id=self.digwork_content_type.pk,
            object_id=digwork.pk,
            action_time__gte=before)

        # no log entry - nothing was done (not new, no update needed)
        if not log_entries.exists():
            # local copy is newer than last source modification date
            if self.verbosity > self.v_normal:
                self.stdout.write(
                    'Source record last updated %s, no update needed' %
                    digwork.updated.date())
            # nothing to do; continue to next item
            self.stats['skipped'] += 1

        elif log_entries.first().action_flag == CHANGE:
            # report if record was changed and update not forced
            if not self.options['update']:
                self.stdout.write(
                    'Source record last updated %s, update needed' %
                    digwork.updated.date())
            # count the update
            self.stats['updated'] += 1

        elif log_entries.first().action_flag == ADDITION:
            # count the new record
            self.stats['created'] += 1

        return digwork
Exemplo n.º 2
0
    def add_items(self, log_msg_src=None, user=None):
        '''Add new items from HathiTrust.

        :params log_msg_src: optional source of change to be included in
            log entry message

        '''
        # disconnect indexing signal handler before adding new content
        IndexableSignalHandler.disconnect()

        for htid in self.htids:
            try:
                digwork = DigitizedWork.add_from_hathi(htid,
                                                       self.bib_api,
                                                       get_data=True,
                                                       log_msg_src=log_msg_src,
                                                       user=user)
                if digwork:
                    self.imported_works.append(digwork)

                self.results[htid] = self.SUCCESS
            except (hathi.HathiItemNotFound, JSONDecodeError,
                    hathi.HathiItemForbidden) as err:
                # json decode error occurred 3/26/2019 - catalog was broken
                # and gave a 200 Ok response with PHP error content
                # hopefully temporary, but could occur again...

                # store the actual error as the results, so that
                # downstream code can report as desired
                self.results[htid] = err

                # remove the partial record if one was created
                # (i.e. if metadata succeeded but data failed)
                DigitizedWork.objects.filter(source_id=htid).delete()

        # reconnect indexing signal handler
        IndexableSignalHandler.connect()
Exemplo n.º 3
0
    def test_add_from_hathi(self, mock_hathibib_api, mock_get_hathi_data,
                            mock_pop_from_bibdata):

        script_user = User.objects.get(username=settings.SCRIPT_USERNAME)

        # add new with default opts
        test_htid = 'abc:12345'
        digwork = DigitizedWork.add_from_hathi(test_htid)
        assert isinstance(digwork, DigitizedWork)
        mock_hathibib_api.assert_called_with()
        mock_hathibib = mock_hathibib_api.return_value
        mock_hathibib.record.assert_called_with('htid', test_htid)
        mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value)
        mock_get_hathi_data.assert_not_called()

        # log entry should exist for record creation only
        log_entries = LogEntry.objects.filter(object_id=digwork.id)
        # should only be one log entry
        assert log_entries.count() == 1
        log_entry = log_entries.first()
        assert log_entry.user == script_user
        assert log_entry.content_type == ContentType.objects.get_for_model(DigitizedWork)
        # default log message for new record
        assert log_entry.change_message == 'Created from HathiTrust bibliographic data'
        assert log_entry.action_flag == ADDITION

        # add new with bib api pased in, get data, and custom message
        my_bib_api = Mock()
        mock_hathibib_api.reset_mock()
        test_htid = 'def:678910'
        digwork = DigitizedWork.add_from_hathi(
            test_htid, bib_api=my_bib_api, get_data=True,
            log_msg_src='in unit tests')
        mock_hathibib_api.assert_not_called()
        my_bib_api.record.assert_called_with('htid', test_htid)
        assert mock_get_hathi_data.call_count == 1
        log_entry = LogEntry.objects.get(object_id=digwork.id)
        assert log_entry.change_message == 'Created in unit tests'

        # update existing record - no change on hathi, not forced
        digwork_updated = digwork.updated  # store local record updated time
        mockhathirecord = mock_hathibib.record.return_value
        # set hathi record last updated before digwork last update
        mockhathirecord.copy_last_updated.return_value = date.today() - timedelta(days=1)
        digwork = DigitizedWork.add_from_hathi(test_htid)
        # bib api should still be called
        mock_hathibib.record.assert_called_with('htid', test_htid)
        # record update time should be unchanged
        assert digwork.updated == digwork_updated
        # still only one log entry
        assert LogEntry.objects.filter(object_id=digwork.id).count() == 1

        # update existing record - no change on hathi, update forced
        mock_pop_from_bibdata.reset_mock()
        digwork = DigitizedWork.add_from_hathi(test_htid, update=True)
        # record update time should be changed
        assert digwork.updated != digwork_updated
        mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value)
        # new log entry should be added
        assert LogEntry.objects.filter(object_id=digwork.id).count() == 2
        # log entry should exist for record update; get newest
        log_entry = LogEntry.objects.filter(object_id=digwork.id) \
            .order_by('-action_time').first()
        assert log_entry.action_flag == CHANGE
        assert log_entry.change_message.startswith('Updated')
        assert '(forced update)' in log_entry.change_message

        # update existing record - changed on hathi, should auto update
        # set hathi record last updated *after* digwork last update
        mock_pop_from_bibdata.reset_mock()
        mockhathirecord.copy_last_updated.return_value = date.today() + timedelta(days=1)
        digwork_updated = digwork.updated  # store local record updated time
        digwork = DigitizedWork.add_from_hathi(test_htid)
        # record update time should be changed
        assert digwork.updated != digwork_updated
        mock_pop_from_bibdata.assert_called_with(mock_hathibib.record.return_value)
        # new log entry should be added
        assert LogEntry.objects.filter(object_id=digwork.id).count() == 3
        # newest log entry should be an update
        assert LogEntry.objects.filter(object_id=digwork.id) \
            .order_by('-action_time').first().action_flag == CHANGE