class Command(LoggingCommand): help = "Load a holdings records after title records are all loaded" # NOQA: A003 args = '<location of holdings directory>' bib_in_settings = validate_bib_dir() if bib_in_settings: default_location = bib_in_settings + '/holdings' else: default_location = None def handle(self, holdings_source=default_location, *args, **options): if not os.path.exists(holdings_source): LOGGER.error("There is no valid holdings source folder defined.") set_holdings = [ 'To load holdings - Add a folder called "holdings"', 'to the bib directory that is set in settings', 'or pass the location of holdings as an arguement to the loader.', ] LOGGER.error(' '.join(set_holdings)) return # First we want to make sure that our material types are up to date material_types = models.MaterialType.objects.all() [m.delete() for m in material_types] management.call_command('loaddata', 'material_types.json') holding_loader = HoldingLoader() holding_loader.main(holdings_source)
def handle(self, *args, **options): start = datetime.now() _logger.info("Starting title sync process.") # only load titles if the BIB_STORAGE is there, not always the case # for folks in the opensource world bib_in_settings = validate_bib_dir() if bib_in_settings: worldcat_dir = bib_in_settings + '/worldcat_titles/' pull_titles = bool(options['pull_title_updates'] and hasattr(settings, "WORLDCAT_KEY")) if pull_titles: call_command('pull_titles', ) _logger.info("Starting load of OCLC titles.") bulk_dir = worldcat_dir + 'bulk' if os.path.isdir(bulk_dir): call_command('load_titles', bulk_dir, skip_index=True) tnu = self.find_titles_not_updated() # Only update by individual lccn if there are records that need updating. if pull_titles and len(tnu): _logger.info( "Pulling titles from OCLC by individual lccn & oclc num.") self.pull_lccn_updates(tnu) _logger.info("Loading titles from second title pull.") lccn_dir = worldcat_dir + 'lccn' if os.path.isdir(lccn_dir): call_command('load_titles', lccn_dir, skip_index=True) tnu = self.find_titles_not_updated(limited=False) _logger.info("Running pre-deletion checks for these titles.") if bib_in_settings: if len(tnu): # Delete titles haven't been update & issues attached. for title in tnu: issues = title.issues.all() error = "DELETION ERROR: Title %s has " % title error_end = "It will not be deleted." if issues: _logger.warning(error + 'issues.' + error_end) continue # Load holdings for all remaining titles. call_command('load_holdings') # overlay place info harvested from dbpedia onto the places table try: self.load_place_links() except Exception, e: _logger.exception(e)
def handle(self, **options): if not (models.Title.objects.all().count() == 0 and models.Holding.objects.all().count() == 0 and models.Essay.objects.all().count() == 0 and models.Batch.objects.all().count() == 0 and models.Issue.objects.all().count() == 0 and models.Page.objects.all().count() == 0 and index.page_count() == 0 and index.title_count() == 0): _logger.warn("Database or index not empty as expected.") return start = datetime.now() management.call_command('loaddata', 'languages.json') management.call_command('loaddata', 'institutions.json') management.call_command('loaddata', 'ethnicities.json') management.call_command('loaddata', 'labor_presses.json') management.call_command('loaddata', 'countries.json') bib_in_settings = validate_bib_dir() if bib_in_settings: # look in BIB_STORAGE for original titles to load for filename in os.listdir(bib_in_settings): if filename.startswith('titles-') and filename.endswith( '.xml'): filepath = os.path.join(bib_in_settings, filename) management.call_command('load_titles', filepath, skip_index=True) management.call_command( 'title_sync', skip_essays=options['skip_essays'], pull_title_updates=options['pull_title_updates']) end = datetime.now() total_time = end - start _logger.info('start time: %s' % start) _logger.info('end time: %s' % end) _logger.info('total time: %s' % total_time) _logger.info("chronam_sync done.")
def handle(self, **options): if not (models.Title.objects.all().count() == 0 and models.Holding.objects.all().count() == 0 and models.Essay.objects.all().count() == 0 and models.Batch.objects.all().count() == 0 and models.Issue.objects.all().count() == 0 and models.Page.objects.all().count() == 0 and index.page_count() == 0 and index.title_count() == 0): _logger.warn("Database or index not empty as expected.") return start = datetime.now() management.call_command('loaddata', 'languages.json') management.call_command('loaddata', 'institutions.json') management.call_command('loaddata', 'ethnicities.json') management.call_command('loaddata', 'labor_presses.json') management.call_command('loaddata', 'countries.json') bib_in_settings = validate_bib_dir() if bib_in_settings: # look in BIB_STORAGE for original titles to load for filename in os.listdir(bib_in_settings): if filename.startswith('titles-') and filename.endswith('.xml'): filepath = os.path.join(bib_in_settings, filename) management.call_command('load_titles', filepath, skip_index=True) management.call_command('title_sync', skip_essays=options['skip_essays'], pull_title_updates=options['pull_title_updates']) end = datetime.now() total_time = end - start _logger.info('start time: %s' % start) _logger.info('end time: %s' % end) _logger.info('total time: %s' % total_time) _logger.info("chronam_sync done.")
def handle(self, *args, **options): start = datetime.now() LOGGER.info("Starting title sync process.") # only load titles if the BIB_STORAGE is there, not always the case # for folks in the opensource world bib_in_settings = validate_bib_dir() if bib_in_settings: worldcat_dir = bib_in_settings + '/worldcat_titles/' pull_titles = bool(options['pull_title_updates'] and hasattr(settings, "WORLDCAT_KEY")) if pull_titles: call_command('pull_titles') LOGGER.info("Starting load of OCLC titles.") bulk_dir = worldcat_dir + 'bulk' if os.path.isdir(bulk_dir): call_command('load_titles', bulk_dir, skip_index=True) tnu = self.find_titles_not_updated() # Only update by individual lccn if there are records that need updating. if pull_titles and len(tnu): LOGGER.info( "Pulling titles from OCLC by individual lccn & oclc num.") self.pull_lccn_updates(tnu) LOGGER.info("Loading titles from second title pull.") lccn_dir = worldcat_dir + 'lccn' if os.path.isdir(lccn_dir): call_command('load_titles', lccn_dir, skip_index=True) tnu = self.find_titles_not_updated(limited=False) LOGGER.info("Running pre-deletion checks for these titles.") # Make sure that our essays are up to date if not options['skip_essays']: load_essays(settings.ESSAYS_FEED) if bib_in_settings: if len(tnu): # Delete titles haven't been update & do not have essays or issues attached. for title in tnu: essays = title.essays.all() issues = title.issues.all() error = "DELETION ERROR: Title %s has " % title error_end = "It will not be deleted." if not essays or not issues: delete_txt = (title.name, title.lccn, title.oclc) LOGGER.info('TITLE DELETED: %s, lccn: %s, oclc: %s' % delete_txt) title.delete() elif essays: LOGGER.warning(error + 'essays.' + error_end) continue elif issues: LOGGER.warning(error + 'issues.' + error_end) continue # Load holdings for all remaining titles. call_command('load_holdings') # overlay place info harvested from dbpedia onto the places table try: self.load_place_links() except Exception as e: LOGGER.exception(e) index.index_titles() # Time of full process run end = datetime.now() total_time = end - start LOGGER.info('start time: %s' % start) LOGGER.info('end time: %s' % end) LOGGER.info('total time: %s' % total_time) LOGGER.info("title_sync done.")
def handle(self, *args, **options): start = datetime.now() _logger.info("Starting title sync process.") # only load titles if the BIB_STORAGE is there, not always the case # for folks in the opensource world bib_in_settings = validate_bib_dir() if bib_in_settings: worldcat_dir = bib_in_settings + '/worldcat_titles/' pull_titles = bool(options['pull_title_updates'] and hasattr(settings, "WORLDCAT_KEY")) if pull_titles: call_command('pull_titles',) _logger.info("Starting load of OCLC titles.") bulk_dir = worldcat_dir + 'bulk' if os.path.isdir(bulk_dir): call_command('load_titles', bulk_dir, skip_index=True) tnu = self.find_titles_not_updated() # Only update by individual lccn if there are records that need updating. if pull_titles and len(tnu): _logger.info("Pulling titles from OCLC by individual lccn & oclc num.") self.pull_lccn_updates(tnu) _logger.info("Loading titles from second title pull.") lccn_dir = worldcat_dir + 'lccn' if os.path.isdir(lccn_dir): call_command('load_titles', lccn_dir, skip_index=True) tnu = self.find_titles_not_updated(limited=False) _logger.info("Running pre-deletion checks for these titles.") # Make sure that our essays are up to date if not options['skip_essays']: load_essays(settings.ESSAYS_FEED) if bib_in_settings: if len(tnu): # Delete titles haven't been update & do not have essays or issues attached. for title in tnu: essays = title.essays.all() issues = title.issues.all() error = "DELETION ERROR: Title %s has " % title error_end = "It will not be deleted." if not essays or not issues: delete_txt = (title.name, title.lccn, title.oclc) _logger.info('TITLE DELETED: %s, lccn: %s, oclc: %s' % delete_txt) title.delete() elif essays: _logger.warning(error + 'essays.' + error_end) continue elif issues: _logger.warning(error + 'issues.' + error_end) continue # Load holdings for all remaining titles. call_command('load_holdings') # overlay place info harvested from dbpedia onto the places table try: self.load_place_links() except Exception, e: _logger.exception(e)