Exemplo n.º 1
0
def insert(dry_run=False, impatient=False):
    """Primary function."""

    # Unzip and zopfli if the content has changed.
    autocompleteroot = os.path.join(settings.BASE_DIR,
                                    "songsearch-autocomplete")
    contentroot = os.path.join(settings.BASE_DIR, "peterbecom-static-content")
    assert os.path.isdir(autocompleteroot)
    zip_path = os.path.join(autocompleteroot, "songsearch-autocomplete.zip")
    assert os.path.isfile(zip_path)
    with tempfile.TemporaryDirectory() as tmpdir:
        with open(zip_path, "rb") as f:
            zf = zipfile.ZipFile(f)
            zf.extractall(tmpdir)
        # print(os.listdir(tmpdir + "/songsearch-autocomplete/js"))
        assert os.listdir(tmpdir)
        source = os.path.join(tmpdir, "songsearch-autocomplete")
        assert os.path.isdir(source), source
        destination = os.path.join(contentroot, "songsearch-autocomplete")
        # print(os.listdir(destination + "/js"))
        different = not _are_dir_trees_equal(source, destination)
        if different:
            shutil.rmtree(destination)
            shutil.move(source, destination)
            print("MOVED", source, "TO", destination)

    assert os.path.isdir(contentroot)
    csspath, = glob(
        os.path.join(contentroot, "songsearch-autocomplete/css/*.css"))
    jspaths = glob(os.path.join(contentroot,
                                "songsearch-autocomplete/js/*.js"))
    jspaths = [x.replace(contentroot + "/", "") for x in jspaths]

    with open(csspath) as f:
        csspayload = f.read()
    csspayload = re.sub(r"\/\*# sourceMappingURL=.*?\*\/", "", csspayload)
    csspayload = csspayload.strip()

    js_block = "\n".join([(JS_BLOCK.replace("{cdn}",
                                            CDN).replace("{jspath}",
                                                         jspath)).strip()
                          for jspath in jspaths])
    css_block = (CSS_BLOCK.replace("{cdn}",
                                   CDN).replace("{csspayload}",
                                                csspayload)).strip()

    template = os.path.join(contentroot,
                            "_FSCACHE/plog/blogitem-040601-1/index.html")
    if not impatient:
        patient_isfile_check(template)
    assert os.path.isfile(template), template
    with open(template) as f:
        original_content = content = f.read()

    # The assumption is that the HTML has been CSS minified. Only after that has
    # been done can we insert (or not insert) the autocomplete snippets.
    # The simplest way to check is if there's a `<link rel="preload" href="*.css"`
    # tag and a big blob of <style>
    if not has_been_css_minified(content):
        print("WARNING! The HTML file hasn't been CSS minified yet.")
        return

    # Inject the JS code
    js_header = "<!-- songsearch-autocomplete -->"
    start = content.find(js_header)
    js_footer = "<!-- /songsearch-autocomplete -->"
    end = content.find(js_footer)
    if start > -1:
        content = content[:
                          start] + js_header + "\n" + js_block + "\n" + content[
                              end:]
    else:
        if js_footer in content and js_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header")
        content = content.replace(
            "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block,
                                                    js_footer))

    # Inject the CSS code
    css_header = "<!-- songsearch-autocomplete-css -->"
    start = content.find(css_header)
    css_footer = "<!-- /songsearch-autocomplete-css -->"
    end = content.find(css_footer)
    if start > -1:
        content = content[:
                          start] + css_header + "\n" + css_block + "\n" + content[
                              end:]
    else:
        if css_footer in content and css_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header")
        content = content.replace(
            "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block,
                                                    css_footer))

    # Paranoia, because it has happened in the past
    js_files = re.findall(
        r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content)
    if len(js_files) != 1:
        os.remove(template)
        raise SongsearchAutocompleteError(
            "Incorrect number of js paths! Should have been just one, not: "
            "{}".format(js_files))

    # When it's done it should only be exactly 1 of these bits of strings
    # in the HTML (actually, it's inside the <style> tag)
    css_bit = "License for minified and inlined CSS originally belongs to Semantic UI"
    if content.count(css_bit) != 1:
        print(content)
        raise SongsearchAutocompleteError(
            "There is not exactly 1 ({} instead) CSS license strings".format(
                content.count(css_bit)))

    if original_content != content:
        if dry_run:
            print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE")
        else:
            with open(template, "w") as f:
                f.write(content)
            if os.path.isfile(template + ".gz"):
                os.remove(template + ".gz")
            _zopfli(template)
            if os.path.isfile(template + ".br"):
                os.remove(template + ".gz")
            _brotli(template)
        print("Updated {} with new content.".format(template))
    else:
        print("Nothing changed in the content. No write.")
        if not os.path.isfile(template + ".gz"):
            print("Going to zopfli a new index.html")
            _zopfli(template)
        if not os.path.isfile(template + ".br"):
            print("Going to brotli a new index.html")
            _brotli(template)

    # The zopfli file should always be younger than the not-zopflied file.
    age_html = os.stat(template).st_mtime
    if os.path.isfile(template + ".gz"):
        age_gz = os.stat(template + ".gz").st_mtime
        if age_html > age_gz:
            os.remove(template + ".gz")
            raise SongsearchAutocompleteError(
                "The index.html.gz file was older than the index.html file")
    if os.path.isfile(template + ".br"):
        age_br = os.stat(template + ".br").st_mtime
        if age_html > age_br:
            os.remove(template + ".br")
            raise SongsearchAutocompleteError(
                "The index.html.br file was older than the index.html file")
def _post_process_template(template,
                           impatient,
                           js_block,
                           css_block,
                           dry_run=False):
    if not template.is_file():
        print("WARNING! {} does not exist".format(template))
        return
    assert template.is_file(), template
    # more convenient this way. Also, mostly due to Python 3.5 and legacy
    template = str(template)
    if not impatient:
        patient_isfile_check(template)

    with open(template) as f:
        original_content = content = f.read()

    # The assumption is that the HTML has been CSS minified. Only after that has
    # been done can we insert (or not insert) the autocomplete snippets.
    # The simplest way to check is if there's a `<link rel="preload" href="*.css"`
    # tag and a big blob of <style>
    try:
        if not has_been_css_minified(content):
            print("WARNING! The HTML file hasn't been CSS minified yet.")
            return
    except ValueError:
        raise CSSMinifiedCheckError(
            "Template with problem: {}".format(template))

    # Inject the JS code
    js_header = "<!-- songsearch-autocomplete -->"
    start = content.find(js_header)
    js_footer = "<!-- /songsearch-autocomplete -->"
    end = content.find(js_footer)
    if start > -1:
        content = content[:
                          start] + js_header + "\n" + js_block + "\n" + content[
                              end:]
    else:
        if js_footer in content and js_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header")
        content = content.replace(
            "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block,
                                                    js_footer))

    # Inject the CSS code
    css_header = "<!-- songsearch-autocomplete-css -->"
    start = content.find(css_header)
    css_footer = "<!-- /songsearch-autocomplete-css -->"
    end = content.find(css_footer)
    if start > -1:
        content = content[:
                          start] + css_header + "\n" + css_block + "\n" + content[
                              end:]
    else:
        if css_footer in content and css_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header")
        content = content.replace(
            "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block,
                                                    css_footer))

    # Paranoia, because it has happened in the past
    js_files = re.findall(
        r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content)
    if len(js_files) != 1:
        os.remove(template)
        raise SongsearchAutocompleteError(
            "Incorrect number of js paths! Should have been just one, not: "
            "{}".format(js_files))

    # When it's done it should only be exactly 1 of these bits of strings
    # in the HTML (actually, it's inside the <style> tag)
    css_bit = "License for minified and inlined CSS originally belongs to Semantic UI"
    if content.count(css_bit) != 1:
        print(content)
        raise SongsearchAutocompleteError(
            "There is not exactly 1 ({} instead) CSS license strings".format(
                content.count(css_bit)))

    if original_content != content:
        if dry_run:
            print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE")
        else:
            with open(template, "w") as f:
                f.write(content)
            if os.path.isfile(template + ".gz"):
                os.remove(template + ".gz")
            _zopfli(template)
            if os.path.isfile(template + ".br"):
                os.remove(template + ".gz")
            _brotli(template)
        print("Updated {} with new content.".format(template))
    else:
        print("Nothing changed in the content. No write.")
        if not os.path.isfile(template + ".gz"):
            print("Going to zopfli a new index.html")
            _zopfli(template)
        if not os.path.isfile(template + ".br"):
            print("Going to brotli a new index.html")
            _brotli(template)
Exemplo n.º 3
0
def _post_process_cached_html(filepath, url, postprocessing, original_url):
    if "\n" in url:
        raise ValueError("URL can't have a linebreak in it ({!r})".format(url))
    if url.startswith("http://testserver"):
        # do nothing. testing.
        return
    if not os.path.exists(filepath):
        postprocessing.notes.append("{} no longer exists".format(filepath))
        return
        # raise ValueError(
        #     "{!r} does not exist and can't be post-processed".format(filepath)
        # )

    attempts = 0
    with open(filepath) as f:
        html = f.read()

    if has_been_css_minified(html):
        # This function has a lock decorator on it. That essentially makes sure,
        # if fired concurrently, at the same time'ish, by two threads, only one
        # of them will run at a time. In serial. The second thread will still
        # get to run. This check is to see if it's no point running now.
        msg = "HTML ({}) already post processed".format(filepath)
        postprocessing.notes.append(msg)
        return

    # Squeezing every little byte out of it!
    # That page doesn't need the little minimalcss stats block.
    # Otherwise, the default is to include it.
    include_minimalcss_stats = "/plog/blogitem-040601-1" not in url

    optimized_html = html
    while True and not url.endswith("/awspa"):
        t0 = time.perf_counter()
        try:
            print("CALLING mincss_html FOR", original_url or url)
            optimized_html = mincss_html(
                html,
                original_url or url,
                include_minimalcss_stats=include_minimalcss_stats,
            )
            t1 = time.perf_counter()
            if optimized_html is None:
                postprocessing.notes.append(
                    "At attempt number {} the optimized HTML "
                    "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0))
            else:
                postprocessing.notes.append(
                    "Took {:.1f}s mincss_html HTML from {} to {}".format(
                        t1 - t0, len(html), len(optimized_html)))
        except ReadTimeout as exception:
            postprocessing.notes.append(
                "Timeout on mincss_html() ({})".format(exception))
            optimized_html = None
            # created = False

        attempts += 1
        if optimized_html is None:
            postprocessing.notes.append(
                "WARNING! mincss_html returned None for {} ({})".format(
                    filepath, url))
            if attempts < 3:
                print("Will try again!")
                time.sleep(1)
                continue
            postprocessing.notes.append(
                "Gave up after {} attempts".format(attempts))
            return

        try:
            shutil.move(filepath, filepath + ".original")
        except FileNotFoundError:
            postprocessing.notes.append(
                "Can't move to .original {} no longer exists".format(filepath))
            return
        with open(filepath, "w") as f:
            f.write(optimized_html)
        print("mincss optimized {}".format(filepath))
        break

    try:
        page, = re.findall(r"/p(\d+)$", url)
        page = int(page)
    except ValueError:
        page = 1

    if "/plog/blogitem-040601-1" in url:
        songsearch_autocomplete.insert(page=page)
    else:
        t0 = time.perf_counter()
        minified_html = _minify_html(filepath, url)
        t1 = time.perf_counter()
        if not minified_html:
            postprocessing.notes.append("Calling minify_html() failed")
        postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 -
                                                                         t0))

        t0 = time.perf_counter()
        _zopfli_html(minified_html and minified_html or optimized_html,
                     filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 -
                                                                         t0))

        t0 = time.perf_counter()
        _brotli_html(minified_html and minified_html or optimized_html,
                     filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 -
                                                                         t0))

    CDNPurgeURL.add(url)
def _post_process_template(template, impatient, js_block, css_block, dry_run=False):
    if not template.is_file():
        print("WARNING! {} does not exist".format(template))
        return
    assert template.is_file(), template
    # more convenient this way. Also, mostly due to Python 3.5 and legacy
    template = str(template)
    if not impatient:
        patient_isfile_check(template)

    with open(template) as f:
        original_content = content = f.read()

    # The assumption is that the HTML has been CSS minified. Only after that has
    # been done can we insert (or not insert) the autocomplete snippets.
    # The simplest way to check is if there's a `<link rel="preload" href="*.css"`
    # tag and a big blob of <style>
    try:
        if not has_been_css_minified(content):
            print("WARNING! The HTML file hasn't been CSS minified yet.")
            return
    except ValueError:
        raise CSSMinifiedCheckError("Template with problem: {}".format(template))

    # Inject the JS code
    js_header = "<!-- songsearch-autocomplete -->"
    start = content.find(js_header)
    js_footer = "<!-- /songsearch-autocomplete -->"
    end = content.find(js_footer)
    if start > -1:
        content = content[:start] + js_header + "\n" + js_block + "\n" + content[end:]
    else:
        if js_footer in content and js_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header"
            )
        content = content.replace(
            "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block, js_footer)
        )

    # Inject the CSS code
    css_header = "<!-- songsearch-autocomplete-css -->"
    start = content.find(css_header)
    css_footer = "<!-- /songsearch-autocomplete-css -->"
    end = content.find(css_footer)
    if start > -1:
        content = content[:start] + css_header + "\n" + css_block + "\n" + content[end:]
    else:
        if css_footer in content and css_header not in content:
            raise SongsearchAutocompleteError(
                "Only footer is in the HTML but not the header"
            )
        content = content.replace(
            "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block, css_footer)
        )

    # Paranoia, because it has happened in the past
    js_files = re.findall(
        r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content
    )
    if len(js_files) != 1:
        os.remove(template)
        raise SongsearchAutocompleteError(
            "Incorrect number of js paths! Should have been just one, not: "
            "{}".format(js_files)
        )

    # When it's done it should only be exactly 1 of these bits of strings
    # in the HTML (actually, it's inside the <style> tag)
    css_bit = "License for minified and inlined CSS originally belongs to Semantic UI"
    if content.count(css_bit) != 1:
        print(content)
        raise SongsearchAutocompleteError(
            "There is not exactly 1 ({} instead) CSS license strings".format(
                content.count(css_bit)
            )
        )

    if original_content != content:
        if dry_run:
            print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE")
        else:
            with open(template, "w") as f:
                f.write(content)
            if os.path.isfile(template + ".gz"):
                os.remove(template + ".gz")
            _zopfli(template)
            if os.path.isfile(template + ".br"):
                os.remove(template + ".gz")
            _brotli(template)
        print("Updated {} with new content.".format(template))
    else:
        print("Nothing changed in the content. No write.")
        if not os.path.isfile(template + ".gz"):
            print("Going to zopfli a new index.html")
            _zopfli(template)
        if not os.path.isfile(template + ".br"):
            print("Going to brotli a new index.html")
            _brotli(template)
Exemplo n.º 5
0
def _post_process_cached_html(filepath, url, postprocessing, original_url):
    if "\n" in url:
        raise ValueError("URL can't have a linebreak in it ({!r})".format(url))
    if url.startswith("http://testserver"):
        # do nothing. testing.
        return
    if not os.path.exists(filepath):
        postprocessing.notes.append("{} no longer exists".format(filepath))
        return
        # raise ValueError(
        #     "{!r} does not exist and can't be post-processed".format(filepath)
        # )

    attempts = 0
    with open(filepath) as f:
        html = f.read()

    if has_been_css_minified(html):
        # This function has a lock decorator on it. That essentially makes sure,
        # if fired concurrently, at the same time'ish, by two threads, only one
        # of them will run at a time. In serial. The second thread will still
        # get to run. This check is to see if it's no point running now.
        msg = "HTML ({}) already post processed".format(filepath)
        postprocessing.notes.append(msg)
        return

    # Squeezing every little byte out of it!
    # That page doesn't need the little minimalcss stats block.
    # Otherwise, the default is to include it.
    include_minimalcss_stats = "/plog/blogitem-040601-1" not in url

    optimized_html = html
    while True and not url.endswith("/awspa"):
        t0 = time.perf_counter()
        try:
            print("CALLING mincss_html FOR", original_url or url)
            optimized_html = mincss_html(
                html,
                original_url or url,
                include_minimalcss_stats=include_minimalcss_stats,
            )
            t1 = time.perf_counter()
            if optimized_html is None:
                postprocessing.notes.append(
                    "At attempt number {} the optimized HTML "
                    "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0)
                )
            else:
                postprocessing.notes.append(
                    "Took {:.1f}s mincss_html HTML from {} to {}".format(
                        t1 - t0, len(html), len(optimized_html)
                    )
                )
        except ReadTimeout as exception:
            postprocessing.notes.append(
                "Timeout on mincss_html() ({})".format(exception)
            )
            optimized_html = None
            # created = False

        attempts += 1
        if optimized_html is None:
            postprocessing.notes.append(
                "WARNING! mincss_html returned None for {} ({})".format(filepath, url)
            )
            if attempts < 3:
                print("Will try again!")
                time.sleep(1)
                continue
            postprocessing.notes.append("Gave up after {} attempts".format(attempts))
            return

        try:
            shutil.move(filepath, filepath + ".original")
        except FileNotFoundError:
            postprocessing.notes.append(
                "Can't move to .original {} no longer exists".format(filepath)
            )
            return
        with open(filepath, "w") as f:
            f.write(optimized_html)
        print("mincss optimized {}".format(filepath))
        break

    try:
        page, = re.findall(r"/p(\d+)$", url)
        page = int(page)
    except ValueError:
        page = 1

    if "/plog/blogitem-040601-1" in url:
        songsearch_autocomplete.insert(page=page)
    else:
        t0 = time.perf_counter()
        minified_html = _minify_html(filepath, url)
        t1 = time.perf_counter()
        if not minified_html:
            postprocessing.notes.append("Calling minify_html() failed")
        postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 - t0))

        t0 = time.perf_counter()
        _zopfli_html(minified_html and minified_html or optimized_html, filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 - t0))

        t0 = time.perf_counter()
        _brotli_html(minified_html and minified_html or optimized_html, filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 - t0))

    purge_cdn_urls([url])
Exemplo n.º 6
0
def _post_process_cached_html(filepath, url, postprocessing):
    if "\n" in url:
        raise ValueError("URL can't have a linebreak in it ({!r})".format(url))
    if url.startswith("http://testserver"):
        # do nothing. testing.
        return
    if not os.path.exists(filepath):
        raise ValueError(
            "{!r} does not exist and can't be post-processed".format(filepath))

    attempts = 0
    with open(filepath) as f:
        html = f.read()

    if has_been_css_minified(html):
        # This function has a lock decorator on it. That essentially makes sure,
        # if fired concurrently, at the same time'ish, by two threads, only one
        # of them will run at a time. In serial. The second thread will still
        # get to run. This check is to see if it's no point running now.
        msg = "HTML ({}) already post processed".format(filepath)
        postprocessing.notes.append(msg)
        return

    optimized_html = html
    while True and not url.endswith("/awspa"):
        t0 = time.perf_counter()
        try:
            optimized_html = mincss_html(html, url)
            t1 = time.perf_counter()
            if optimized_html is None:
                postprocessing.notes.append(
                    "At attempt number {} the optimized HTML "
                    "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0))
            else:
                postprocessing.notes.append(
                    "Took {:.1f}s mincss_html HTML from {} to {}".format(
                        t1 - t0, len(html), len(optimized_html)))
        except ReadTimeout as exception:
            postprocessing.notes.append(
                "Timeout on mincss_html() ({})".format(exception))
            optimized_html = None
            # created = False

        attempts += 1
        if optimized_html is None:
            postprocessing.notes.append(
                "WARNING! mincss_html returned None for {} ({})".format(
                    filepath, url))
            if attempts < 3:
                print("Will try again!")
                time.sleep(1)
                continue
            postprocessing.notes.append(
                "Gave up after {} attempts".format(attempts))
            return

        shutil.move(filepath, filepath + ".original")
        with open(filepath, "w") as f:
            f.write(optimized_html)
        print("mincss optimized {}".format(filepath))
        break

    if url.endswith("/plog/blogitem-040601-1"):
        songsearch_autocomplete.insert()
    else:
        t0 = time.perf_counter()
        minified_html = _minify_html(filepath, url)
        t1 = time.perf_counter()
        if not minified_html:
            postprocessing.notes.append("Calling minify_html() failed")
        postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 -
                                                                         t0))

        t0 = time.perf_counter()
        _zopfli_html(minified_html and minified_html or optimized_html,
                     filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 -
                                                                         t0))

        t0 = time.perf_counter()
        _brotli_html(minified_html and minified_html or optimized_html,
                     filepath, url)
        t1 = time.perf_counter()
        postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 -
                                                                         t0))