def _serve_urls(self, urls): return [{"url": url.url, "view": url_for_url(url), "archived_at": url.archived_at.isoformat(), "variants": [{"variant": variant.variant, "data": variant.data, "is_ready": variant.is_ready} for variant in url.variants]} for url in urls]
def post(self): parser = reqparse.RequestParser() parser.add_argument("url", help="URL to archive", type=unicode, required=True) args = parser.parse_args() url = db.session.query(UrlModel).\ filter( UrlModel.url == args["url"], UrlModel.archived_at >= datetime.now() - app.config["MIN_INTERVAL"] ).\ order_by(UrlModel.archived_at.desc()).\ first() if url: return {"view": url_for_url(url)} o = urlparse.urlsplit(urllib.unquote(args["url"].encode("utf-8"))) if o.scheme == "" or o.netloc == "": abort(400) url = UrlModel() url.url = args["url"] url.scheme = o.scheme url.netloc = o.netloc url.path = o.path url.query = o.query url.archived_at = datetime.utcnow().replace(microsecond=0) hash = hashlib.sha1(url.url.encode("utf-8") + url.archived_at.isoformat()).hexdigest() url.archive_path = "/".join(filter(None, re.split("(.{2})", hash))[:3] + [hash]) db.session.add(url) for width, height in app.config["RESOLUTIONS"]: self._create_variant(url, "google-chrome", {"width": width, "height": height}, google_chrome) self._create_variant(url, "wget", {}, wget) return {"view": url_for_url(url)}
def view(archived_at, variant, scheme, netloc, tail): prefix = "/view/%s/%s/%s/%s" % (archived_at, variant, scheme, netloc) path = request.environ["PATH_INFO"][len(prefix):] q = db.session.query(Url).\ outerjoin(UrlVariant).\ filter( Url.scheme == scheme.encode("ascii"), Url.netloc == netloc.encode("ascii"), Url.path == path if path.strip(b"/") != b"" else ((Url.path == b"/") | (Url.path == b"")), Url.query == request.environ["QUERY_STRING"], ) if archived_at == "_": url = q.order_by(Url.archived_at.desc()).first() else: url = q.filter(Url.archived_at == datetime.strptime(archived_at, app.config["ARCHIVE_URL_DATETIME_FORMAT"])).first() if url is None: abort(404) variant_view = None if variant == "_": try: client_width = int(request.cookies["resolution"].split("x")[0]) except (KeyError, ValueError): client_width = app.config["RESOLUTIONS"][0][0] try: variant = sorted(filter(lambda variant: (variant.variant == "google-chrome" and variant.data["width"] < client_width + 20), url.ready_variants), key=lambda variant: -variant.data["width"])[0] variant_view = "screenshot" except IndexError: try: variant = url.ready_variants[0] except IndexError: abort(503) else: m = re.match("(?P<view>[a-z]+)-(?P<width>[0-9]+)x(?P<height>[0-9]+)", variant) if m: for variant in url.variants: if variant.variant == "google-chrome": if variant.data["width"] == int(m.group("width")) and\ variant.data["height"] == int(m.group("height")): if variant.is_ready: break else: abort(503) else: return redirect(url_for_url(url)) variant_view = m.group("view") else: try: variant = filter(lambda v: v.variant == variant, url.variants)[0] except IndexError: return redirect(url_for_url(url)) if not variant.is_ready: abort(503) header = render_template("view/header.html", url=url, variant=variant, variants=([{"title": "Google Chrome %dx%d" % (v.data["width"], v.data["height"]), "is_current": variant == v, "views": ([{"title": "Screenshot", "url": url_for_url(url, "screenshot-%dx%d" % (v.data["width"], v.data["height"])), "is_current": v == variant and variant_view == "screenshot"}, {"title": "PDF", "url": url_for_url(url, "pdf-%dx%d" % (v.data["width"], v.data["height"])), "is_current": v == variant and variant_view == "pdf"}] + ([{"title": "HTML", "url": url_for_url(url, "html-%dx%d" % (v.data["width"], v.data["height"])), "is_current": v == variant and variant_view == "html"}] if "html" in v.data.get("features", ["html"]) else []))} for v in sorted(filter(lambda vv: vv.variant == "google-chrome", url.ready_variants), key=lambda vv: vv.data["width"])] + ([{"title": "wget HTML", "url": url_for_url(url, "wget"), "is_current": variant.variant == "wget"}] if any(v.variant == "wget" for v in url.ready_variants) else []))) if variant.variant == "google-chrome": if variant_view == "screenshot": src = "%s/%s/screenshot.png" % (app.config["ARCHIVE_URL"], variant.data["archive_path"]) if os.path.getsize(os.path.join(app.config["ARCHIVE_PATH"], variant.data["archive_path"], "screenshot.png")) == 0: src = None html = render_template("view/screenshot.html", url=url, src=src) base = None elif variant_view == "pdf": return redirect("%s/%s/page.pdf" % (app.config["ARCHIVE_URL"], variant.data["archive_path"])) elif variant_view == "html": with open(os.path.join(app.config["ARCHIVE_PATH"], variant.data["archive_path"], "index.html")) as f: html = f.read() base = "%s/%s/" % (app.config["ARCHIVE_URL"], variant.data["archive_path"]) else: return redirect(url_for_url(url)) elif variant.variant == "wget": with open(os.path.join(app.config["ARCHIVE_PATH"], variant.data["archive_path"], variant.data["relpath"])) as f: html = f.read() base = "%s/%s/%s" % (app.config["ARCHIVE_URL"], variant.data["archive_path"], variant.data["relpath"]) else: return redirect(url_for_url(url)) soup = BeautifulSoup(html) soup.head.insert(0, soup.new_tag("link", href=url_for("static", filename="header.css", _external=True), rel="stylesheet")) if base: soup.head.insert(0, soup.new_tag("base", href=base)) soup.body.insert(0, BeautifulSoup(header)) return Response(soup.prettify())