def main(argv): cc.statics.argv = argv cc.inits.run_inits() cc.actor.starter.starter() cc.actor.join() cc.inits.run_finals() logging.info('main: exit') return 0
def _starter_helper(sub_feed, is_stashing): logging.info("starter_helper: sub_feed.url=%s", sub_feed.url) for episode in cc.episode.Episode.make_episodes(sub_feed): if is_stashing: with cc.statics.pickle_lock: pickle.dump(episode, cc.statics.pickle_file) else: cc.actor.downloader.downloader(episode)
def thread_main(): thread_name = threading.current_thread().name logging.info('%s: start', thread_name) while True: message = cc.statics.message_queue.get() logging.trace('%s: %s', thread_name, message) try: message.process() except Exception: logging.exception('%s: %s', thread_name, message) finally: cc.statics.message_queue.task_done() logging.error('%s: exit (impossible!)', thread_name)
def _starter(show_url, is_unstashing, start, end, step): logging.info("starter: show_url=%s", show_url) if is_unstashing: with cc.statics.pickle_lock: pickle_file = cc.statics.pickle_file try: while True: episode = pickle.load(pickle_file) cc.actor.downloader.downloader(episode) except EOFError: pass return feed = cc.feed.Feed.from_show_url(show_url) logging.debug( "feed..." "\n url=%s" "\n videos_url=%s" "\n start=%s" "\n end=%s", feed.url, feed.videos_url, feed.start, feed.end, ) for sub_feed in feed.replace_date_range(start, end, step): starter_helper(sub_feed)
def _downloader(episode, output_dir_path, simulate, salvage_dirs): logging.info('downloader: episode .date=%s .url=%s', episode.date, episode.url) logging.trace('downloader: episode...' '\n url=%s' '\n dir_name=%s' '\n date=%s' '\n videos=\n%s', episode.url, episode.dir_name, episode.date, cc.pformat.PrettyFormatter(episode.videos)) # Check/make paths. dir_path = os.path.join(output_dir_path, episode.dir_name) if os.path.exists(dir_path): logging.info('downloader: skip: dir_path=%s', dir_path) return salvage = cc.salvage.Salvage(episode.dir_name, salvage_dirs) if simulate: tmp_dir_path = os.path.join( output_dir_path, 'tmpXXXXXXXX-' + episode.dir_name) else: tmp_dir_path = tempfile.mkdtemp( suffix='-'+episode.dir_name, dir=output_dir_path) logging.debug('downloader: tmp_dir_path=%s', tmp_dir_path) # Construct actors. counter = cc.actor.counter.Counter( functools.partial(_downloader_success, episode.url, tmp_dir_path, dir_path, simulate), functools.partial(_downloader_failed, episode.url)) dlers = _make_dlers(episode, tmp_dir_path, counter, simulate, salvage) counter.count = len(dlers) # Start actors. for dler in dlers: dler()
def _downloader_success(episode_url, tmp_dir_path, dir_path, simulate): logging.debug('downloader: %s -> %s', tmp_dir_path, dir_path) if not simulate: os.rename(tmp_dir_path, dir_path) logging.info('downloader: success: episode.url=%s', episode_url)
def final_stash(): args = cc.statics.args if args.stash is not None or args.unstash is not None: logging.info("final_stash: close %s", cc.statics.pickle_file.name) cc.statics.pickle_file.close()
def _download(url, file_name, cwd, prog, download_timeout, monitor_period, cpu_bound, memory_bound, partial_okay): cwd = cwd or os.getcwd() file_name_part = file_name + '.part' output_path = os.path.join(cwd, file_name) output_path_part = os.path.join(cwd, file_name_part) digest = None for retry_exp in itertools.count(): timer = threading.Timer(download_timeout, lambda: None) timer.daemon = True proc = _make_subprocess(url, file_name_part, cwd, prog) timer.start() ret = -1 while True: try: ret = proc.wait(timeout=monitor_period) break except psutil.TimeoutExpired: pass cpu_percent = proc.get_cpu_percent(interval=None) memory_percent = proc.get_memory_percent() logging.trace('rtmp: pid=%d cpu=%.1f memory=%.1f', proc.pid, cpu_percent, memory_percent) if cpu_percent > cpu_bound: logging.error('rtmp: cpu limit exceeded') proc.kill() break if memory_percent > memory_bound: logging.error('rtmp: memory limit exceeded') proc.kill() break if timer.finished.is_set(): logging.error('rtmp: timeout: %s -> %s', url, output_path_part) proc.kill() break timer.cancel() if prog == 'rtmpdump' and ret == RTMPDUMP_INCOMPLETE: if partial_okay: logging.warning( 'rtmp: partial download %s to %s', url, file_name) ret = 0 break with open(output_path_part, 'rb') as output_file: new_digest = hashlib.sha1(output_file.read()).digest() if digest is not None and digest == new_digest: # We made no progress; the download might be completed. # Let's not retry and assume it was. logging.warning( 'rtmp: no progress: url=%s file_name=%s', url, file_name) ret = 0 break digest = new_digest # rtmpdump didn't complete the transfer; resume might get further. retry = 2 ** retry_exp if retry > download_timeout: logging.error('rtmp: retry timeout: %s -> %s', url, output_path_part) else: logging.trace('rtmp: retry=%d url=%s', retry, url) time.sleep(retry) continue if ret is not None and ret != 0: raise cc.Error('Could not download (ret=%s): %s' % (ret, url)) # Okay, we are done. break os.rename(output_path_part, output_path) logging.info('rtmp: success: %s -> %s', url, output_path)