Exemplo n.º 1
0
 def _serve_urls(self, urls):
     return [{"url": url.url,
              "view": url_for_url(url),
              "archived_at": url.archived_at.isoformat(),
              "variants": [{"variant": variant.variant,
                            "data": variant.data,
                            "is_ready": variant.is_ready}
                           for variant in url.variants]}
              for url in urls]
Exemplo n.º 2
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("url", help="URL to archive", type=unicode, required=True)
        args = parser.parse_args()

        url = db.session.query(UrlModel).\
                filter(
                    UrlModel.url == args["url"],
                    UrlModel.archived_at >= datetime.now() - app.config["MIN_INTERVAL"]
                ).\
                order_by(UrlModel.archived_at.desc()).\
                first()
        if url:
            return {"view": url_for_url(url)}

        o = urlparse.urlsplit(urllib.unquote(args["url"].encode("utf-8")))
        if o.scheme == "" or o.netloc == "":
            abort(400)

        url = UrlModel()
        url.url = args["url"]

        url.scheme = o.scheme
        url.netloc = o.netloc
        url.path = o.path
        url.query = o.query

        url.archived_at = datetime.utcnow().replace(microsecond=0)

        hash = hashlib.sha1(url.url.encode("utf-8") + url.archived_at.isoformat()).hexdigest()
        url.archive_path = "/".join(filter(None, re.split("(.{2})", hash))[:3] + [hash])

        db.session.add(url)

        for width, height in app.config["RESOLUTIONS"]:
            self._create_variant(url, "google-chrome", {"width": width, "height": height}, google_chrome)

        self._create_variant(url, "wget", {}, wget)

        return {"view": url_for_url(url)}
Exemplo n.º 3
0
def view(archived_at, variant, scheme, netloc, tail):
    prefix = "/view/%s/%s/%s/%s" % (archived_at, variant, scheme, netloc)
    path = request.environ["PATH_INFO"][len(prefix):]
    q = db.session.query(Url).\
            outerjoin(UrlVariant).\
            filter(
                Url.scheme == scheme.encode("ascii"),
                Url.netloc == netloc.encode("ascii"),
                Url.path == path if path.strip(b"/") != b"" else ((Url.path == b"/") | (Url.path == b"")),
                Url.query == request.environ["QUERY_STRING"],
            )
    if archived_at == "_":
        url = q.order_by(Url.archived_at.desc()).first()
    else:
        url = q.filter(Url.archived_at == datetime.strptime(archived_at, app.config["ARCHIVE_URL_DATETIME_FORMAT"])).first()

    if url is None:
        abort(404)

    variant_view = None
    if variant == "_":
        try:
            client_width = int(request.cookies["resolution"].split("x")[0])
        except (KeyError, ValueError):
            client_width = app.config["RESOLUTIONS"][0][0]

        try:
            variant = sorted(filter(lambda variant: (variant.variant == "google-chrome" and
                                                     variant.data["width"] < client_width + 20),
                                    url.ready_variants),
                             key=lambda variant: -variant.data["width"])[0]
            variant_view = "screenshot"
        except IndexError:
            try:
                variant = url.ready_variants[0]
            except IndexError:
                abort(503)
    else:
        m = re.match("(?P<view>[a-z]+)-(?P<width>[0-9]+)x(?P<height>[0-9]+)", variant)
        if m:
            for variant in url.variants:
                if variant.variant == "google-chrome":
                    if variant.data["width"] == int(m.group("width")) and\
                            variant.data["height"] == int(m.group("height")):
                        if variant.is_ready:
                            break
                        else:
                            abort(503)
            else:
                return redirect(url_for_url(url))
            variant_view = m.group("view")
        else:
            try:
                variant = filter(lambda v: v.variant == variant, url.variants)[0]
            except IndexError:
                return redirect(url_for_url(url))

            if not variant.is_ready:
                abort(503)

    header = render_template("view/header.html", url=url, variant=variant,
                             variants=([{"title": "Google Chrome %dx%d" % (v.data["width"],
                                                                           v.data["height"]),
                                         "is_current": variant == v,
                                         "views": ([{"title": "Screenshot",
                                                     "url": url_for_url(url, "screenshot-%dx%d" % (v.data["width"],
                                                                                                   v.data["height"])),
                                                     "is_current": v == variant and variant_view == "screenshot"},
                                                    {"title": "PDF",
                                                     "url": url_for_url(url, "pdf-%dx%d" % (v.data["width"],
                                                                                            v.data["height"])),
                                                     "is_current": v == variant and variant_view == "pdf"}] +
                                                   ([{"title": "HTML",
                                                      "url": url_for_url(url, "html-%dx%d" % (v.data["width"],
                                                                                              v.data["height"])),
                                                      "is_current": v == variant and variant_view == "html"}]
                                                    if "html" in v.data.get("features", ["html"]) else []))}
                                        for v in sorted(filter(lambda vv: vv.variant == "google-chrome",
                                                               url.ready_variants),
                                                        key=lambda vv: vv.data["width"])] +
                                       ([{"title": "wget HTML",
                                          "url": url_for_url(url, "wget"),
                                          "is_current": variant.variant == "wget"}]
                                        if any(v.variant == "wget" for v in url.ready_variants)
                                        else [])))

    if variant.variant == "google-chrome":
        if variant_view == "screenshot":
            src = "%s/%s/screenshot.png" % (app.config["ARCHIVE_URL"],
                                            variant.data["archive_path"])
            if os.path.getsize(os.path.join(app.config["ARCHIVE_PATH"],
                                            variant.data["archive_path"],
                                            "screenshot.png")) == 0:
                src = None
            html = render_template("view/screenshot.html", url=url, src=src)
            base = None
        elif variant_view == "pdf":
            return redirect("%s/%s/page.pdf" % (app.config["ARCHIVE_URL"],
                                                variant.data["archive_path"]))
        elif variant_view == "html":
            with open(os.path.join(app.config["ARCHIVE_PATH"], variant.data["archive_path"],
                                   "index.html")) as f:
                html = f.read()
                base = "%s/%s/" % (app.config["ARCHIVE_URL"],
                                   variant.data["archive_path"])
        else:
            return redirect(url_for_url(url))
    elif variant.variant == "wget":
        with open(os.path.join(app.config["ARCHIVE_PATH"], variant.data["archive_path"],
                               variant.data["relpath"])) as f:
            html = f.read()
            base = "%s/%s/%s" % (app.config["ARCHIVE_URL"],
                                 variant.data["archive_path"],
                                 variant.data["relpath"])
    else:
        return redirect(url_for_url(url))

    soup = BeautifulSoup(html)
    soup.head.insert(0, soup.new_tag("link", href=url_for("static", filename="header.css", _external=True),
                                     rel="stylesheet"))
    if base:
        soup.head.insert(0, soup.new_tag("base", href=base))
    soup.body.insert(0, BeautifulSoup(header))
    return Response(soup.prettify())