def append_identifier(d, key, obj, type): ids = exists_to_none(d, key) if ids: if isinstance(ids, list): for id in ids: obj.identifiers.append(Identifier(id, type=type)) else: obj.identifiers.append(Identifier(ids, type=type))
def import_from_feed(self, feed, even_if_no_author=False, immediately_presentation_ready=False, feed_url=None): # Keep track of editions that were imported. Pools and works # for those editions may be looked up or created. imported_editions = {} pools = {} works = {} # CoverageFailures that note business logic errors and non-success download statuses failures = {} # If parsing the overall feed throws an exception, we should address that before # moving on. Let the exception propagate. metadata_objs, failures = self.extract_feed_data(feed, feed_url) # make editions. if have problem, make sure associated pool and work aren't created. for key, metadata in metadata_objs.iteritems(): # key is identifier.urn here # If there's a status message about this item, don't try to import it. if key in failures.keys(): continue try: # Create an edition. This will also create a pool if there's circulation data. edition = self.import_edition_from_metadata( metadata, even_if_no_author, immediately_presentation_ready ) if edition: imported_editions[key] = edition except Exception, e: # Rather than scratch the whole import, treat this as a failure that only applies # to this item. self.log.error("Error importing an OPDS item", exc_info=e) identifier, ignore = Identifier.parse_urn(self._db, key) data_source = self.data_source failure = CoverageFailure(identifier, traceback.format_exc(), data_source=data_source, transient=False) failures[key] = failure # clean up any edition might have created if key in imported_editions: del imported_editions[key] # Move on to the next item, don't create a work. continue try: pool, work = self.update_work_for_edition( edition, even_if_no_author, immediately_presentation_ready ) if pool: pools[key] = pool if work: works[key] = work except Exception, e: identifier, ignore = Identifier.parse_urn(self._db, key) data_source = self.data_source failure = CoverageFailure(identifier, traceback.format_exc(), data_source=data_source, transient=False) failures[key] = failure
def categories(cls, work): """Send out _all_ categories for the work. (So long as the category type has a URI associated with it in Subject.uri_lookup.) """ _db = Session.object_session(work) by_scheme_and_term = dict() identifier_ids = work.all_identifier_ids() classifications = Identifier.classifications_for_identifier_ids( _db, identifier_ids) for c in classifications: subject = c.subject if subject.type in Subject.uri_lookup: scheme = Subject.uri_lookup[subject.type] term = subject.identifier weight_field = AtomFeed.schema_("ratingValue") key = (scheme, term) if not key in by_scheme_and_term: value = dict(term=subject.identifier) if subject.name: value['label'] = subject.name value[weight_field] = 0 by_scheme_and_term[key] = value by_scheme_and_term[key][weight_field] += c.weight # Collapse by_scheme_and_term to by_scheme by_scheme = defaultdict(list) for (scheme, term), value in by_scheme_and_term.items(): by_scheme[scheme].append(value) by_scheme.update(super(VerboseAnnotator, cls).categories(work)) return by_scheme
def _retrieve_links(self, publications): if self.lamr is None: return pubs_by_uids = {} for pub in publications: for id in Identifier.find_by_type(pub.identifiers, 'WOK'): pubs_by_uids[id.value] = pub uids = pubs_by_uids.keys() result_by_uids = self.lamr.retrieve_by_ids(uids) for uid, result in result_by_uids.iteritems(): pub = pubs_by_uids[uid] if 'timesCited' in result: pub.times_cited = int(result['timesCited']) if 'sourceURL' in result: pub.source_urls.append( URL(result['sourceURL'], type='WOK', description=u'Web of Science®')) if 'citingArticlesURL' in result: pub.cite_urls.append( URL(result['citingArticlesURL'], type='WOK', description=u'Web of Science®')) if 'message' in result: pub.errors.append(u'Failed loading article URLs: ' + unicode(result['message']))
def test_transient_failure_if_requested_book_not_mentioned(self): """Test an unrealistic case where we ask Axis 360 about one book and it tells us about a totally different book. """ # We're going to ask about abcdef identifier = self._identifier(identifier_type=Identifier.AXIS_360_ID) identifier.identifier = 'abcdef' # But we're going to get told about 0003642860. data = self.get_data("single_item.xml") self.api.queue_response(200, content=data) [result] = self.provider.process_batch([identifier]) # Coverage failed for the book we asked about. assert isinstance(result, CoverageFailure) eq_(identifier, result.obj) eq_("Book not in collection", result.exception) # And nothing major was done about the book we were told # about. We created an Identifier record for its identifier, # but no LicensePool or Edition. wrong_identifier = Identifier.for_foreign_id( self._db, Identifier.AXIS_360_ID, "0003642860" ) eq_([], identifier.licensed_through) eq_([], identifier.primarily_identifies)
def process_urn(self, urn, **kwargs): """Turn a URN into a Work suitable for use in an OPDS feed. """ try: identifier, is_new = Identifier.parse_urn(self._db, urn) except ValueError, e: identifier = None
def extract_identifier(cls, identifier_tag): """Turn a <dcterms:identifier> tag into an IdentifierData object.""" try: type, identifier = Identifier.type_and_identifier_for_urn(identifier_tag.text.lower()) return IdentifierData(type, identifier) except ValueError: return None
def detail_for_elementtree_entry( cls, parser, entry_tag, data_source, feed_url=None, ): """Turn an <atom:entry> tag into a dictionary of metadata that can be used as keyword arguments to the Metadata contructor. :return: A 2-tuple (identifier, kwargs) """ identifier = parser._xpath1(entry_tag, 'atom:id') if identifier is None or not identifier.text: # This <entry> tag doesn't identify a book so we # can't derive any information from it. return None, None, None identifier = identifier.text try: data = cls._detail_for_elementtree_entry( parser, entry_tag, feed_url ) return identifier, data, None except Exception, e: _db = Session.object_session(data_source) identifier_obj, ignore = Identifier.parse_urn(_db, identifier) failure = CoverageFailure( identifier_obj, traceback.format_exc(), data_source, transient=True ) return identifier, None, failure
def parse_identifier_list( cls, _db, identifier_type, arguments, autocreate=False ): """Turn a list of identifiers into a list of Identifier objects. The list of arguments is probably derived from a command-line parser such as the one defined in IdentifierInputScript.arg_parser(). This makes it easy to identify specific identifiers on the command line. Examples: 1 2 a b c """ current_identifier_type = None if len(arguments) == 0: return [] if not identifier_type: raise ValueError("No identifier type specified!") identifiers = [] for arg in arguments: identifier, ignore = Identifier.for_foreign_id( _db, identifier_type, arg, autocreate=autocreate ) if not identifier: logging.warn( "Could not load identifier %s/%s", identifier_type, arg ) if identifier: identifiers.append(identifier) return identifiers
def process_batch(self, identifiers): identifier_strings = self.api.create_identifier_strings(identifiers) response = self.api.availability(title_ids=identifier_strings) seen_identifiers = set() batch_results = [] for metadata, availability in self.parser.process_all( response.content): identifier, is_new = metadata.primary_identifier.load(self._db) if not identifier in identifiers: # Theta told us about a book we didn't ask # for. This shouldn't happen, but if it does we should # do nothing further. continue seen_identifiers.add(identifier.identifier) result = self.set_metadata(identifier, metadata) if not isinstance(result, CoverageFailure): result = self.handle_success(identifier) batch_results.append(result) # Create a CoverageFailure object for each original identifier # not mentioned in the results. for identifier_string in identifier_strings: if identifier_string not in seen_identifiers: identifier, ignore = Identifier.for_foreign_id( self._db, Identifier.THETA_ID, identifier_string) result = CoverageFailure(identifier, "Book not in collection", data_source=self.output_source, transient=False) batch_results.append(result) return batch_results
def explain_identifier(cls, identifier, primary, seen, strength, level): indent = " " * level if primary: ident = "Primary identifier" else: ident = "Identifier" if primary: strength = 1 output = "%s %s: %s/%s (q=%s)" % (indent, ident, identifier.type, identifier.identifier, strength) print output.encode("utf8") _db = Session.object_session(identifier) classifications = Identifier.classifications_for_identifier_ids( _db, [identifier.id]) for classification in classifications: subject = classification.subject genre = subject.genre if genre: genre = genre.name else: genre = "(!genre)" #print "%s %s says: %s/%s %s w=%s" % ( # indent, classification.data_source.name, # subject.identifier, subject.name, genre, classification.weight #) seen.add(identifier) for equivalency in identifier.equivalencies: if equivalency.id in seen: continue seen.add(equivalency.id) output = equivalency.output cls.explain_identifier(output, False, seen, equivalency.strength, level+1)
def items_that_need_coverage(self, identifiers=None, **kwargs): """Find all items lacking coverage from this CoverageProvider. Items should be Identifiers, though Editions should also work. By default, all identifiers of the `input_identifier_types` which don't already have coverage are chosen. :param identifiers: The batch of identifier objects to test for coverage. identifiers and self.input_identifiers can intersect -- if this provider was created for the purpose of running specific Identifiers, and within those Identifiers you want to batch, you can use both parameters. """ qu = Identifier.missing_coverage_from( self._db, self.input_identifier_types, self.output_source, count_as_missing_before=self.cutoff_time, operation=self.operation, identifiers=self.input_identifiers, **kwargs) if identifiers: qu = qu.filter(Identifier.id.in_([x.id for x in identifiers])) return qu
def _identifier(self, identifier_type=Identifier.GUTENBERG_ID, foreign_id=None): if foreign_id: id = foreign_id else: id = self._str return Identifier.for_foreign_id(self._db, identifier_type, id)[0]
def search_citations(self, publications): for publication in publications: ut = list(Identifier.find_by_type(publication.identifiers, 'WOK')) if len(ut) == 0: continue ut = ut[0].value.lstrip(u'WOS:') for cite_url in URL.find_by_type(publication.cite_urls, 'WOK'): for pub in self._get_citations_from_url(cite_url.value, ut): yield pub
def process_urns(self, urns, **process_urn_kwargs): """Processes a list of URNs for a lookup request. :return: None or, to override default feed behavior, a ProblemDetail or Response """ identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) self.add_urn_failure_messages(failures) for urn, identifier in identifiers_by_urn.items(): self.process_identifier(identifier, urn, **process_urn_kwargs)
def test_import_one_feed(self): # Check coverage records are created. monitor = OPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, DoomedOPDSImporter) data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) feed = self.content_server_mini_feed monitor.import_one_feed(feed, "http://root-url/") editions = self._db.query(Edition).all() # One edition has been imported eq_(1, len(editions)) [edition] = editions # That edition has a CoverageRecord. record = CoverageRecord.lookup( editions[0].primary_identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) eq_(CoverageRecord.SUCCESS, record.status) eq_(None, record.exception) # The edition's primary identifier has a cover link whose # relative URL has been resolved relative to the URL we passed # into import_one_feed. [cover] = [ x.resource.url for x in editions[0].primary_identifier.links if x.rel == Hyperlink.IMAGE ] eq_("http://root-url/full-cover-image.png", cover) # The 202 status message in the feed caused a transient failure. # The exception caused a persistent failure. coverage_records = self._db.query(CoverageRecord).filter( CoverageRecord.operation == CoverageRecord.IMPORT_OPERATION, CoverageRecord.status != CoverageRecord.SUCCESS) eq_( sorted([ CoverageRecord.TRANSIENT_FAILURE, CoverageRecord.PERSISTENT_FAILURE ]), sorted([x.status for x in coverage_records])) identifier, ignore = Identifier.parse_urn( self._db, "urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441") failure = CoverageRecord.lookup( identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) assert "Utter failure!" in failure.exception
def search_citations(self, publications): """Vrati iterator vracajuci zoznam publikacii, ktore cituju publikacie v zozname publications """ for publication in publications: eid = list(Identifier.find_by_type(publication.identifiers, 'SCOPUS')) if len(eid) == 0: continue eid = eid[0].value for pub in self.search_citations_by_eid(eid): yield pub
def coveragefailure_from_message(cls, data_source, message): """Turn a <simplified:message> tag into a CoverageFailure.""" _db = Session.object_session(data_source) # First thing to do is determine which Identifier we're # talking about. If we can't do that, we can't create a # CoverageFailure object. urn = message.urn try: identifier, ignore = Identifier.parse_urn(_db, urn) except ValueError, e: identifier = None
def search_citations(self, publications): for publication in publications: eid = list( Identifier.find_by_type(publication.identifiers, 'SCOPUS')) if len(eid) == 0: continue eid = eid[0].value detail_url = list( URL.find_by_type(publication.source_urls, 'SCOPUS')) if len(detail_url) == 0: continue detail_url = detail_url[0].value for pub in self._get_citations_from_detail_url(detail_url, eid): yield pub
def items_that_need_coverage(self, identifiers=None, **kwargs): """Find all items lacking coverage from this CoverageProvider. Items should be Identifiers, though Editions should also work. By default, all identifiers of the `input_identifier_types` which don't already have coverage are chosen. """ qu = Identifier.missing_coverage_from( self._db, self.input_identifier_types, self.output_source, count_as_missing_before=self.cutoff_time, operation=self.operation, **kwargs) if identifiers: qu = qu.filter(Identifier.id.in_([x.id for x in identifiers])) return qu
def assign_indexes(self, publications): pub_by_id = {} for pub in publications: e = list(Index.find_by_type(pub.indexes, 'WOS')) if len(e) > 0: continue ut = list(Identifier.find_by_type(pub.identifiers, 'WOK')) if len(ut) == 0: continue ut = ut[0].value pub_by_id[ut] = pub editions = self._find_editions(pub_by_id.keys()) for ut, edition in editions.iteritems(): pub_by_id[ut].indexes.append(Index(edition, type='WOS'))
def data_detail_for_feedparser_entry(cls, entry, data_source): """Turn an entry dictionary created by feedparser into dictionaries of data that can be used as keyword arguments to the Metadata and CirculationData constructors. :return: A 3-tuple (identifier, kwargs for Metadata constructor, failure) """ identifier = entry.get('id') if not identifier: return None, None, None # At this point we can assume that we successfully got some # metadata, and possibly a link to the actual book. try: kwargs_meta = cls._data_detail_for_feedparser_entry(entry, data_source) return identifier, kwargs_meta, None except Exception, e: _db = Session.object_session(data_source) identifier_obj, ignore = Identifier.parse_urn(_db, identifier) failure = CoverageFailure( identifier_obj, traceback.format_exc(), data_source, transient=True ) return identifier, None, failure
from model import ( DataSource, LicensePool, SessionManager, Work, Identifier, ) from model import production_session if __name__ == '__main__': session = production_session() data_source_name = sys.argv[1] identifier = sys.argv[2] data_source = DataSource.lookup(session, data_source_name) wid, ignore = Identifier.for_foreign_id( session, data_source.primary_identifier_type, identifier, False) pool = session.query(LicensePool).filter( LicensePool.data_source == data_source).filter( LicensePool.identifier == wid).one() primary_edition = pool.edition() old_work = primary_edition.work if old_work: old_work.license_pools.remove(pool) primary_edition.work = None pool.calculate_work() work = pool.work work.calculate_presentation() session.commit()
from nose.tools import set_trace import os import site import sys import datetime d = os.path.split(__file__)[0] site.addsitedir(os.path.join(d, "..")) from integration.threem import ( ThreeMAPI, ) from integration.overdrive import ( OverdriveAPI, ) from model import ( production_session, DataSource, Edition, Identifier, ) if __name__ == '__main__': type, identifier_name = sys.argv[1:3] db = production_session() identifier, is_new = Identifier.for_foreign_id(db, type, identifier_name) if identifier.type == Identifier.THREEM_ID: source = DataSource.lookup(db, DataSource.THREEM) api = ThreeMAPI(db) edition, ignore = Edition.for_foreign_id(db, source, type, identifier_name) data = api.get_bibliographic_info_for([edition])
import site import sys import datetime d = os.path.split(__file__)[0] site.addsitedir(os.path.join(d, "..")) from integration.threem import ( ThreeMAPI, ) from integration.overdrive import ( OverdriveAPI, ) from model import ( production_session, DataSource, Edition, Identifier, ) if __name__ == '__main__': type, identifier_name = sys.argv[1:3] db = production_session() identifier, is_new = Identifier.for_foreign_id(db, type, identifier_name) if identifier.type==Identifier.THREEM_ID: source = DataSource.lookup(db, DataSource.THREEM) api = ThreeMAPI(db) edition, ignore = Edition.for_foreign_id( db, source, type, identifier_name) data = api.get_bibliographic_info_for([edition])
import sys from nose.tools import set_trace d = os.path.split(__file__)[0] site.addsitedir(os.path.join(d, "..")) from model import DataSource, LicensePool, SessionManager, Work, Identifier from model import production_session if __name__ == "__main__": session = production_session() data_source_name = sys.argv[1] identifier = sys.argv[2] data_source = DataSource.lookup(session, data_source_name) wid, ignore = Identifier.for_foreign_id(session, data_source.primary_identifier_type, identifier, False) pool = ( session.query(LicensePool) .filter(LicensePool.data_source == data_source) .filter(LicensePool.identifier == wid) .one() ) primary_edition = pool.edition() old_work = primary_edition.work if old_work: old_work.license_pools.remove(pool) primary_edition.work = None pool.calculate_work() work = pool.work work.calculate_presentation() session.commit()
def _identifier(self, identifier_type=Identifier.GUTENBERG_ID): id = self._str return Identifier.for_foreign_id(self._db, identifier_type, id)[0]
def test_recursively_equivalent_identifiers(self): # We start with a Gutenberg book. gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) record, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "100") gutenberg_id = record.primary_identifier # We use OCLC Classify to do a title/author lookup. oclc = DataSource.lookup(self._db, DataSource.OCLC) search_id, ignore = Identifier.for_foreign_id(self._db, Identifier.OCLC_WORK, "60010") gutenberg_id.equivalent_to(oclc, search_id, 1) # The title/author lookup associates the search term with two # different OCLC Numbers. oclc_id, ignore = Identifier.for_foreign_id(self._db, Identifier.OCLC_NUMBER, "9999") oclc_id_2, ignore = Identifier.for_foreign_id(self._db, Identifier.OCLC_NUMBER, "1000") search_id.equivalent_to(oclc, oclc_id, 1) search_id.equivalent_to(oclc, oclc_id_2, 1) # We then use OCLC Linked Data to connect one of the OCLC # Numbers with an ISBN. linked_data = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) isbn_id, ignore = Identifier.for_foreign_id(self._db, Identifier.ISBN, "900100434X") oclc_id.equivalent_to(linked_data, isbn_id, 1) # As it turns out, we have an Overdrive work record... overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE) overdrive_record, ignore = Edition.for_foreign_id( self._db, overdrive, Identifier.OVERDRIVE_ID, "{111-222}") overdrive_id = overdrive_record.primary_identifier # ...which is tied (by Overdrive) to the same ISBN. overdrive_id.equivalent_to(overdrive, isbn_id, 1) # Finally, here's a completely unrelated Edition, which # will not be showing up. gutenberg2, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "200") gutenberg2.title = "Unrelated Gutenberg record." levels = [record.equivalent_identifiers(i) for i in range(0, 5)] # At level 0, the only identifier found is the Gutenberg ID. eq_(set([gutenberg_id]), set(levels[0])) # At level 1, we pick up the title/author lookup. eq_(set([gutenberg_id, search_id]), set(levels[1])) # At level 2, we pick up the title/author lookup and the two # OCLC Numbers. eq_(set([gutenberg_id, search_id, oclc_id, oclc_id_2]), set(levels[2])) # At level 3, we also pick up the ISBN. eq_(set([gutenberg_id, search_id, oclc_id, oclc_id_2, isbn_id]), set(levels[3])) # At level 4, the recursion starts to go in the other # direction: we pick up the Overdrive ID that's equivalent to # the same ISBN as the OCLC Number. eq_( set([ gutenberg_id, search_id, oclc_id, oclc_id_2, isbn_id, overdrive_id ]), set(levels[4]))
def do_run(self, _db): identifier = Identifier(type='Keep It', identifier='100') _db.add(identifier)
def do_run(self, _db): identifier = Identifier(type='You Can', identifier='Keep It') _db.add(identifier) raise RuntimeError
def extract_feed_data(self, feed, feed_url=None): """Turn an OPDS feed into lists of Metadata and CirculationData objects, with associated messages and next_links. """ # This is one of these cases where we want to create a # DataSource if it doesn't already exist. This way you don't have # to predefine a DataSource for every source of OPDS feeds. data_source = self.data_source fp_metadata, fp_failures = self.extract_data_from_feedparser(feed=feed, data_source=data_source) # gets: medium, measurements, links, contributors, etc. xml_data_meta, xml_failures = self.extract_metadata_from_elementtree( feed, data_source=data_source, feed_url=feed_url ) # translate the id in failures to identifier.urn identified_failures = {} for id, failure in fp_failures.items() + xml_failures.items(): external_identifier, ignore = Identifier.parse_urn(self._db, id) if self.identifier_mapping: internal_identifier = self.identifier_mapping.get( external_identifier, external_identifier) else: internal_identifier = external_identifier identified_failures[internal_identifier.urn] = failure # Use one loop for both, since the id will be the same for both dictionaries. metadata = {} circulationdata = {} for id, m_data_dict in fp_metadata.items(): external_identifier, ignore = Identifier.parse_urn(self._db, id) if self.identifier_mapping: internal_identifier = self.identifier_mapping.get( external_identifier, external_identifier) else: internal_identifier = external_identifier # Don't process this item if there was already an error if internal_identifier.urn in identified_failures.keys(): continue identifier_obj = IdentifierData( type=internal_identifier.type, identifier=internal_identifier.identifier ) # form the Metadata object xml_data_dict = xml_data_meta.get(id, {}) combined_meta = self.combine(m_data_dict, xml_data_dict) if combined_meta.get('data_source') is None: combined_meta['data_source'] = self.data_source_name combined_meta['primary_identifier'] = identifier_obj metadata[internal_identifier.urn] = Metadata(**combined_meta) # form the CirculationData that would correspond to this Metadata c_circulation_dict = m_data_dict.get('circulation') xml_circulation_dict = xml_data_dict.get('circulation', {}) c_data_dict = self.combine(c_circulation_dict, xml_circulation_dict) if c_data_dict: circ_links_dict = {} # extract just the links to pass to CirculationData constructor if 'links' in xml_data_dict: circ_links_dict['links'] = xml_data_dict['links'] combined_circ = self.combine(c_data_dict, circ_links_dict) if combined_circ.get('data_source') is None: combined_circ['data_source'] = self.data_source_name combined_circ['primary_identifier'] = identifier_obj circulation = CirculationData(**combined_circ) if circulation.formats: metadata[internal_identifier.urn].circulation = circulation else: # If the CirculationData has no formats, it # doesn't really offer any way to actually get the # book, and we don't want to create a # LicensePool. All the circulation data is # useless. # # TODO: This will need to be revisited when we add # ODL support. metadata[internal_identifier.urn].circulation = None return metadata, identified_failures
def check_for_new_data(self, feed): """Check if the feed contains any entries that haven't been imported yet. If force_import is set, every entry in the feed is treated as new. """ # If force_reimport is set, we don't even need to check. Always # treat the feed as though it contained new data. if self.force_reimport: return True last_update_dates = self.importer.extract_last_update_dates(feed) new_data = False for identifier, remote_updated in last_update_dates: identifier, ignore = Identifier.parse_urn(self._db, identifier) data_source = self.importer.data_source record = None if identifier: record = CoverageRecord.lookup( identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION ) # If there was a transient failure last time we tried to # import this book, try again regardless of whether the # feed has changed. if record and record.status == CoverageRecord.TRANSIENT_FAILURE: new_data = True self.log.info( "Counting %s as new because previous attempt resulted in transient failure: %s", record.identifier, record.exception ) break # If our last attempt was a success or a persistent # failure, we only want to import again if something # changed since then. if record and record.timestamp: # We've imported this entry before, so don't import it # again unless it's changed. if not remote_updated: # The remote isn't telling us whether the entry # has been updated. Import it again to be safe. new_data = True self.log.info( "Counting %s as new because remote has no information about when it was updated.", record.identifier ) break if remote_updated >= record.timestamp: # This book has been updated. self.log.info( "Counting %s as new because its coverage date is %s and remote has %s.", record.identifier, record.timestamp, remote_updated ) new_data = True break else: # There's no record of an attempt to import this book. self.log.info( "Counting %s as new because it has no CoverageRecord.", identifier ) new_data = True break return new_data