def _try_youtube_dl(self, ydl, site, page): try: self.logger.info("trying youtube-dl on {}".format(page)) info = ydl.extract_info(page.url) if self._proxy(site) and self._enable_warcprox_features(site): info_json = json.dumps(info, sort_keys=True, indent=4) self.logger.info( "sending WARCPROX_WRITE_RECORD request to warcprox " "with youtube-dl json for %s", page) self._warcprox_write_record( warcprox_address=self._proxy(site), url="youtube-dl:%s" % brozzler.fixup(page.url), warc_type="metadata", content_type="application/vnd.youtube-dl_formats+json;charset=utf-8", payload=info_json.encode("utf-8"), extra_headers=site.extra_headers()) except brozzler.ShutdownRequested as e: raise except BaseException as e: if hasattr(e, "exc_info") and e.exc_info[0] == youtube_dl.utils.UnsupportedError: pass elif (hasattr(e, "exc_info") and e.exc_info[0] == urllib.error.HTTPError and hasattr(e.exc_info[1], "code") and e.exc_info[1].code == 420): raise brozzler.ReachedLimit(e.exc_info[1]) else: raise
def _on_screenshot(screenshot_png): if on_screenshot: on_screenshot(screenshot_png) elif self._proxy(site) and self._enable_warcprox_features(site): self.logger.info("sending WARCPROX_WRITE_RECORD request " "to warcprox with screenshot for %s", page) screenshot_jpeg, thumbnail_jpeg = self.full_and_thumb_jpegs( screenshot_png) self._warcprox_write_record(warcprox_address=self._proxy(site), url="screenshot:%s" % brozzler.fixup(page.url), warc_type="resource", content_type="image/jpeg", payload=screenshot_jpeg, extra_headers=site.extra_headers()) self._warcprox_write_record(warcprox_address=self._proxy(site), url="thumbnail:%s" % brozzler.fixup(page.url), warc_type="resource", content_type="image/jpeg", payload=thumbnail_jpeg, extra_headers=site.extra_headers())