def log_archiving_paused(num_links, idx, timestamp): end_ts = datetime.now() _LAST_RUN_STATS['end_ts'] = end_ts print() print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format( **ANSI, now=end_ts.strftime('%Y-%m-%d %H:%M:%S'), idx=idx+1, timestamp=timestamp, total=num_links, )) print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', ''))) print(' Continue where you left off by running:') print(' {} {}'.format( pretty_path(sys.argv[0]), timestamp, ))
def update_archive(archive_path, links, source=None, resume=None, append=True): """update or create index.html+json given a path to an export file containing new links""" start_ts = datetime.now().timestamp() if resume: print('{green}[▶] [{}] Resuming archive downloading from {}...{reset}'. format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), resume, **ANSI, )) else: print( '{green}[▶] [{}] Updating content for {} pages in archive...{reset}' .format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), **ANSI, )) # loop over links and archive them archive_links(archive_path, links, source=source, resume=resume) # print timing information & summary end_ts = datetime.now().timestamp() seconds = end_ts - start_ts if seconds > 60: duration = '{0:.2f} min'.format(seconds / 60, 2) else: duration = '{0:.2f} sec'.format(seconds, 2) print('{}[√] [{}] Update of {} pages complete ({}){}'.format( ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), duration, ANSI['reset'], )) print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped'])) print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded'])) print(' - {} errors'.format(_RESULTS_TOTALS['failed'])) print(' To view your archive, open: {}/index.html'.format( OUTPUT_DIR.replace(REPO_DIR + '/', '')))
def log_archiving_finished(num_links): end_ts = datetime.now() _LAST_RUN_STATS['end_ts'] = end_ts seconds = end_ts.timestamp() - _LAST_RUN_STATS['start_ts'].timestamp() if seconds > 60: duration = '{0:.2f} min'.format(seconds / 60, 2) else: duration = '{0:.2f} sec'.format(seconds, 2) print('{}[√] [{}] Update of {} pages complete ({}){}'.format( ANSI['green'], end_ts.strftime('%Y-%m-%d %H:%M:%S'), num_links, duration, ANSI['reset'], )) print(' - {} links skipped'.format(_LAST_RUN_STATS['skipped'])) print(' - {} links updated'.format(_LAST_RUN_STATS['succeeded'])) print(' - {} links had errors'.format(_LAST_RUN_STATS['failed'])) print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', '')))
def update_archive(archive_path, links, source=None, resume=None, append=True): """update or create index.html+json given a path to an export file containing new links""" start_ts = datetime.now().timestamp() if resume: print('{green}[▶] [{}] Resuming archive downloading from {}...{reset}'. format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), resume, **ANSI, )) else: print( '{green}[▶] [{}] Updating content for {} pages in archive...{reset}' .format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), **ANSI, )) check_links_structure(links) # prefetch the first link off the generator so that if we pause or fail # immediately we can show that we paused on the first link and not just None to_archive = Peekable(links_after_timestamp(links, resume)) idx, link = 0, to_archive.peek(0) # loop over links and archive them try: check_dependencies() for idx, link in enumerate(to_archive): link_dir = os.path.join(ARCHIVE_DIR, link['timestamp']) archive_link(link_dir, link) except (KeyboardInterrupt, SystemExit, Exception) as e: print( '\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}' .format( **ANSI, now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), idx=idx + 1, timestamp=link['timestamp'], total=len(links), )) print(' To view your archive, open: {}/index.html'.format( OUTPUT_DIR.replace(REPO_DIR + '/', ''))) print(' Continue where you left off by running:') print(' {} {}'.format( pretty_path(sys.argv[0]), link['timestamp'], )) if not isinstance(e, KeyboardInterrupt): print() raise e raise SystemExit(1) # print timing information & summary end_ts = datetime.now().timestamp() seconds = end_ts - start_ts if seconds > 60: duration = '{0:.2f} min'.format(seconds / 60, 2) else: duration = '{0:.2f} sec'.format(seconds, 2) print('{}[√] [{}] Update of {} pages complete ({}){}'.format( ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), duration, ANSI['reset'], )) print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped'])) print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded'])) print(' - {} errors'.format(_RESULTS_TOTALS['failed'])) print(' To view your archive, open: {}/index.html'.format( OUTPUT_DIR.replace(REPO_DIR + '/', '')))