def _get_url(url): logging.debug('get_url: url=%s', url) response = requests.get(url, timeout=60) if logging.is_enabled_for(logging.TRACE): for header, value in response.headers.items(): logging.trace('get_url: %s: %s', header, value) response.raise_for_status() return response
def _dler(dl, url, dir_path, fne, ext, counter): logging.debug( 'downloader: %s -> %s', url, os.path.join(dir_path, fne + ext)) try: dl(url, dir_path, fne, ext) except: counter.cancel() raise else: counter.countdown()
def _make_dlers(episode, tmp_dir_path, counter, simulate, salvage): dlers = [] for dl, url, fne, ext in _get_dls(episode): src_path = salvage.find(episode.dir_name, fne + ext) if src_path is not None: logging.debug('downloader: salvage: %s', src_path) dl, url = _dl_copy, src_path if simulate: dl = _dl_none dlers.append(functools.partial( _dler, dl, url, tmp_dir_path, fne, ext, counter)) return dlers
def _make_subprocess(url, file_name, cwd, prog): if prog == 'rtmpdump': cmd = ['rtmpdump', '--quiet', '--rtmp', url, '--flv', file_name, '--resume', '--skip', '1'] else: cmd = ['ffmpeg', '-i', url, file_name] logging.debug('exec: CWD=%s %s', cwd, ' '.join(cmd)) return psutil.Popen(cmd, cwd=cwd)
def video_blobs(self): if self._video_blobs is None: self._video_blobs = [] for video in self.feed['result']['videos']: page_url = video['canonicalURL'] logging.debug('video_blobs: page_url=%s', page_url) # Zero out time part of datetime object. date = datetime.datetime.fromtimestamp(int(video['airDate'])) date = datetime.datetime(date.year, date.month, date.day) page_doc = cc.http.get_url(page_url) self._video_blobs.append(VideoBlob( uri=_get_uri(page_doc, video['id']), page_url=page_url, episode_url=_get_episode_url(page_doc), date=date)) return self._video_blobs
def _get_url_dom_tree_with_fixes(url): '''Fix some known syntax errors in the returned xml documents.''' doc = cc.http.get_url_bytes(url) pieces = [] # Remove comments within which have double hyphen. comment_end = 0 while True: comment_start = doc.find(b'<!--', comment_end) if comment_start == -1: break comment_end = doc.index(b'-->', comment_start) + 3 logging.debug('strip: %s', repr(doc[comment_start:comment_end])) pieces.append(doc[0:comment_start]) pieces.append(doc[comment_end:]) if pieces: doc = b''.join(pieces) return lxml.etree.fromstring(doc)
def _starter(show_url, is_unstashing, start, end, step): logging.info("starter: show_url=%s", show_url) if is_unstashing: with cc.statics.pickle_lock: pickle_file = cc.statics.pickle_file try: while True: episode = pickle.load(pickle_file) cc.actor.downloader.downloader(episode) except EOFError: pass return feed = cc.feed.Feed.from_show_url(show_url) logging.debug( "feed..." "\n url=%s" "\n videos_url=%s" "\n start=%s" "\n end=%s", feed.url, feed.videos_url, feed.start, feed.end, ) for sub_feed in feed.replace_date_range(start, end, step): starter_helper(sub_feed)
def _downloader(episode, output_dir_path, simulate, salvage_dirs): logging.info('downloader: episode .date=%s .url=%s', episode.date, episode.url) logging.trace('downloader: episode...' '\n url=%s' '\n dir_name=%s' '\n date=%s' '\n videos=\n%s', episode.url, episode.dir_name, episode.date, cc.pformat.PrettyFormatter(episode.videos)) # Check/make paths. dir_path = os.path.join(output_dir_path, episode.dir_name) if os.path.exists(dir_path): logging.info('downloader: skip: dir_path=%s', dir_path) return salvage = cc.salvage.Salvage(episode.dir_name, salvage_dirs) if simulate: tmp_dir_path = os.path.join( output_dir_path, 'tmpXXXXXXXX-' + episode.dir_name) else: tmp_dir_path = tempfile.mkdtemp( suffix='-'+episode.dir_name, dir=output_dir_path) logging.debug('downloader: tmp_dir_path=%s', tmp_dir_path) # Construct actors. counter = cc.actor.counter.Counter( functools.partial(_downloader_success, episode.url, tmp_dir_path, dir_path, simulate), functools.partial(_downloader_failed, episode.url)) dlers = _make_dlers(episode, tmp_dir_path, counter, simulate, salvage) counter.count = len(dlers) # Start actors. for dler in dlers: dler()
def _downloader_success(episode_url, tmp_dir_path, dir_path, simulate): logging.debug('downloader: %s -> %s', tmp_dir_path, dir_path) if not simulate: os.rename(tmp_dir_path, dir_path) logging.info('downloader: success: episode.url=%s', episode_url)