Exemplo n.º 1
0
 def test_respect_link_order(self):
     html = os.path.join(HERE, 'three-links.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     hrefs = [x.href for x in p.links]
     eq_(hrefs, ['two.css', 'three.css'])
Exemplo n.º 2
0
 def test_respect_link_order(self):
     html = os.path.join(HERE, "three-links.html")
     url = "file://" + html
     p = Processor()
     p.process(url)
     hrefs = [x.href for x in p.links]
     eq_(hrefs, ["two.css", "three.css"])
Exemplo n.º 3
0
    def test_pseudo_selectors_hell(self):
        html = os.path.join(HERE, "three.html")
        url = "file://" + html
        p = Processor(preserve_remote_urls=False)
        p.process(url)
        # two.html only has 1 link CSS ref
        link = p.links[0]
        after = link.after
        ok_("a.three:hover" in after)
        ok_("a.hundred:link" not in after)

        ok_(".container > a.one" in after)
        ok_(".container > a.notused" not in after)
        ok_('input[type="button"]' not in after)

        ok_('input[type="search"]::-webkit-search-decoration' in after)
        ok_('input[type="reset"]::-webkit-search-decoration' not in after)

        ok_("@media (max-width: 900px)" in after)
        ok_(".container .two" in after)
        ok_("a.four" not in after)

        ok_("::-webkit-input-placeholder" in after)
        ok_(":-moz-placeholder {" in after)
        ok_("div::-moz-focus-inner" in after)
        ok_("button::-moz-focus-inner" not in after)

        ok_("@-webkit-keyframes progress-bar-stripes" in after)
        ok_("from {" in after)
        # print after

        # some day perhaps this can be untangled and parsed too
        ok_("@import url(other.css)" in after)
Exemplo n.º 4
0
    def test_pseudo_selectors_hell(self):
        html = os.path.join(HERE, 'three.html')
        url = 'file://' + html
        p = Processor(preserve_remote_urls=False)
        p.process(url)
        # two.html only has 1 link CSS ref
        link = p.links[0]
        after = link.after
        self.assertTrue('a.three:hover' in after)
        self.assertTrue('a.hundred:link' not in after)

        self.assertTrue('.container > a.one' in after)
        self.assertTrue('.container > a.notused' not in after)
        self.assertTrue('input[type="button"]' not in after)

        self.assertTrue('input[type="search"]::-webkit-search-decoration' in after)
        self.assertTrue('input[type="reset"]::-webkit-search-decoration' not in after)

        self.assertTrue('@media (max-width: 900px)' in after)
        self.assertTrue('.container .two' in after)
        self.assertTrue('a.four' not in after)

        self.assertTrue('::-webkit-input-placeholder' in after)
        self.assertTrue(':-moz-placeholder {' in after)
        self.assertTrue('div::-moz-focus-inner' in after)
        self.assertTrue('button::-moz-focus-inner' not in after)

        self.assertTrue('@-webkit-keyframes progress-bar-stripes' in after)
        self.assertTrue('from {' in after)

        # some day perhaps this can be untangled and parsed too
        self.assertTrue('@import url(other.css)' in after)
Exemplo n.º 5
0
 def test_no_mincss_link(self):
     html = os.path.join(HERE, 'no-mincss-link.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     link = p.links[0]
     eq_(link.before, link.after)
Exemplo n.º 6
0
 def test_no_mincss_link(self):
     html = os.path.join(HERE, 'no-mincss-link.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     link = p.links[0]
     eq_(link.before, link.after)
Exemplo n.º 7
0
def run():
    p = Processor()
    p.process(URL)

    # 输出INlink的css的简化前和简化后的css代码
    print("INLINES ".ljust(79, '-'))
    for each in p.inlines:
        print("On line %s" % each.line)
        print('- ' * 40)
        print("BEFORE")
        print(each.before)
        print('- ' * 40)
        print("AFTER:")
        print(each.after)

    # 输出link引用的css的简化前和简化后的css代码
    print("LINKS ".ljust(79, '-'))
    for each in p.links:
        print("On href %s" % each.href)
        print('- ' * 40)
        print("BEFORE")
        print(each.before)
        print('- ' * 40)
        print("AFTER:")
        print(each.after)
Exemplo n.º 8
0
    def _execute(self, options, args):
        """Apply mincss the generated site."""
        output_folder = self.site.config['OUTPUT_FOLDER']
        if Processor is None:
            print('To use the mincss command,'
                  ' you have to install the "mincss" package.')
            return

        p = Processor(preserve_remote_urls=False)
        urls = []
        css_files = {}
        for root, dirs, files in os.walk(output_folder):
            for f in files:
                url = os.path.join(root, f)
                if url.endswith('.css'):
                    fname = os.path.basename(url)
                    if fname in css_files:
                        print("You have two CSS files with the same name and that confuses me.")
                        sys.exit(1)
                    css_files[fname] = url
                if not f.endswith('.html'):
                    continue
                urls.append(url)
        p.process(*urls)
        for inline in p.links:
            fname = os.path.basename(inline.href)
            print("===>", inline.href, len(inline.before), len(inline.after))
            with open(css_files[fname], 'wb+') as outf:
                outf.write(inline.after)
Exemplo n.º 9
0
    def test_pseudo_selectors_hell(self):
        html = os.path.join(HERE, 'three.html')
        url = 'file://' + html
        p = Processor(preserve_remote_urls=False)
        p.process(url)
        # two.html only has 1 link CSS ref
        link = p.links[0]
        after = link.after
        ok_('a.three:hover' in after)
        ok_('a.hundred:link' not in after)

        ok_('.container > a.one' in after)
        ok_('.container > a.notused' not in after)
        ok_('input[type="button"]' not in after)

        ok_('input[type="search"]::-webkit-search-decoration' in after)
        ok_('input[type="reset"]::-webkit-search-decoration' not in after)

        ok_('@media (max-width: 900px)' in after)
        ok_('.container .two' in after)
        ok_('a.four' not in after)

        ok_('::-webkit-input-placeholder' in after)
        ok_(':-moz-placeholder {' in after)
        ok_('div::-moz-focus-inner' in after)
        ok_('button::-moz-focus-inner' not in after)

        ok_('@-webkit-keyframes progress-bar-stripes' in after)
        ok_('from {' in after)

        # some day perhaps this can be untangled and parsed too
        ok_('@import url(other.css)' in after)
Exemplo n.º 10
0
    def _execute(self, options, args):
        """Apply mincss the generated site."""
        output_folder = self.site.config['OUTPUT_FOLDER']
        if Processor is None:
            req_missing(['mincss'], 'use the "mincss" command')
            return

        p = Processor(preserve_remote_urls=False)
        urls = []
        css_files = {}
        for root, dirs, files in os.walk(output_folder):
            for f in files:
                url = os.path.join(root, f)
                if url.endswith('.css'):
                    fname = os.path.basename(url)
                    if fname in css_files:
                        self.logger.error(
                            "You have two CSS files with the same name and that confuses me."
                        )
                        sys.exit(1)
                    css_files[fname] = url
                if not f.endswith('.html'):
                    continue
                urls.append(url)
        p.process(*urls)
        for inline in p.links:
            fname = os.path.basename(inline.href)
            with open(css_files[fname], 'wb+') as outf:
                outf.write(inline.after)
Exemplo n.º 11
0
    def _execute(self, options, args):
        """Apply mincss the generated site."""
        output_folder = self.site.config['OUTPUT_FOLDER']
        if Processor is None:
            req_missing(['mincss'], 'use the "mincss" command')
            return

        p = Processor(preserve_remote_urls=False)
        urls = []
        css_files = {}
        for root, dirs, files in os.walk(output_folder, followlinks=True):
            for f in files:
                url = os.path.join(root, f)
                if url.endswith('.css'):
                    fname = os.path.basename(url)
                    if fname in css_files:
                        self.logger.error("You have two CSS files with the same name and that confuses me.")
                        sys.exit(1)
                    css_files[fname] = url
                if not f.endswith('.html'):
                    continue
                urls.append(url)
        p.process(*urls)
        for inline in p.links:
            fname = os.path.basename(inline.href)
            with open(css_files[fname], 'wb+') as outf:
                outf.write(inline.after)
Exemplo n.º 12
0
def run():
	p = Processor()
	for url in urls:
		p.process(url)

	for each in p.links:
		print each.after
Exemplo n.º 13
0
 def test_respect_link_order(self):
     html = os.path.join(HERE, 'three-links.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     hrefs = [x.href for x in p.links]
     eq_(hrefs, ['two.css', 'three.css'])
Exemplo n.º 14
0
 def test_before_after(self):
     html = os.path.join(HERE, 'before-after.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     ok_('ul li:after { content: "x"; }' not in after)
     ok_('ol li:before { content: "x"; }' in after)
Exemplo n.º 15
0
 def test_complex_colons_in_selector_expression(self):
     html = os.path.join(HERE, 'complex-selector.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     ok_('a[href^="javascript:"] { color: pink; }' in after)
     ok_('a[href^="javascript:"]:after { content: "x"; }' in after)
Exemplo n.º 16
0
 def test_complex_colons_in_selector_expression(self):
     html = os.path.join(HERE, 'complex-selector.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     ok_('a[href^="javascript:"] { color: pink; }' in after)
     ok_('a[href^="javascript:"]:after { content: "x"; }' in after)
Exemplo n.º 17
0
 def test_before_after(self):
     html = os.path.join(HERE, 'before-after.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     ok_('ul li:after { content: "x"; }' not in after)
     ok_('ol li:before { content: "x"; }' in after)
Exemplo n.º 18
0
def run(url):
    p = Processor()
    t0 = time.time()
    p.process(url)
    t1 = time.time()

    print("INLINES ".ljust(79, '-'))
    total_size_before = 0
    total_size_after = 0
    # for each in p.inlines:
    #     print("On line %s" % each.line)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     total_size_after += len(each.after)
    #     print("\n")
    #
    # print("LINKS ".ljust(79, '-'))
    # for each in p.links:
    #     print("On href %s" % each.href)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     print("\n")
    #
    # print("LINKS ".ljust(79, '-'))
    # for each in p.links:
    #     print("On href %s" % each.href)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     total_size_after += len(each.after)
    #     print("\n")

    print(
        "TOOK:".ljust(20),
        "%.5fs" % (t1 - t0)
    )
    print(
        "TOTAL SIZE BEFORE:".ljust(20),
        "%.1fKb" % (total_size_before / 1024.0)
    )
    print(
        "TOTAL SIZE AFTER:".ljust(20),
        "%.1fKb" % (total_size_after / 1024.0)
    )
Exemplo n.º 19
0
    def test_non_ascii_html(self):
        html = os.path.join(HERE, "eight.html")
        url = "file://" + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        ok_(isinstance(after, unicode))
        ok_(u"Varf\xf6r st\xe5r det h\xe4r?" in after)
Exemplo n.º 20
0
    def test_non_ascii_html(self):
        html = os.path.join(HERE, 'eight.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        self.assertTrue(isinstance(after, unicode))
        self.assertTrue(u'Varf\xf6r st\xe5r det h\xe4r?' in after)
Exemplo n.º 21
0
    def test_non_ascii_html(self):
        html = os.path.join(HERE, 'eight.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        ok_(isinstance(after, unicode))
        ok_(u'Varf\xf6r st\xe5r det h\xe4r?' in after)
Exemplo n.º 22
0
 def test_make_absolute_url(self):
     p = Processor()
     eq_(p.make_absolute_url("http://www.com/", "./style.css"), "http://www.com/style.css")
     eq_(p.make_absolute_url("http://www.com", "./style.css"), "http://www.com/style.css")
     eq_(p.make_absolute_url("http://www.com", "//cdn.com/style.css"), "http://cdn.com/style.css")
     eq_(p.make_absolute_url("http://www.com/", "//cdn.com/style.css"), "http://cdn.com/style.css")
     eq_(p.make_absolute_url("http://www.com/", "/style.css"), "http://www.com/style.css")
     eq_(p.make_absolute_url("http://www.com/elsewhere", "/style.css"), "http://www.com/style.css")
     eq_(p.make_absolute_url("http://www.com/elsewhere/", "/style.css"), "http://www.com/style.css")
     eq_(p.make_absolute_url("http://www.com/elsewhere/", "./style.css"), "http://www.com/elsewhere/style.css")
     eq_(p.make_absolute_url("http://www.com/elsewhere", "./style.css"), "http://www.com/style.css")
Exemplo n.º 23
0
    def test_complicated_keyframes(self):
        html = os.path.join(HERE, "six.html")
        url = "file://" + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count("{"), after.count("}"))
        ok_(".pull-left" in after)
        ok_(".pull-right" in after)
        ok_(".pull-middle" not in after)
Exemplo n.º 24
0
    def test_complicated_keyframes(self):
        html = os.path.join(HERE, 'six.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('.pull-left' in after)
        ok_('.pull-right' in after)
        ok_('.pull-middle' not in after)
Exemplo n.º 25
0
    def test_complicated_keyframes(self):
        html = os.path.join(HERE, 'six.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('.pull-left' in after)
        ok_('.pull-right' in after)
        ok_('.pull-middle' not in after)
Exemplo n.º 26
0
def gencss(htmldir, htmlfiles):
    fullpaths = map(lambda x: join(htmldir, x), htmlfiles)
    basenames = map(lambda x: x.split('.')[0], htmlfiles)
    cssdir = '../www/css/'
    p = Processor(optimize_lookup=True)
    for f, b in zip(fullpaths, basenames):
        p.process(f)
        for css in p.links:
            cssfile = join(cssdir, b) + ".css"
            with open(cssfile, 'wb') as fh:
                fh.write(css.after)
Exemplo n.º 27
0
    def test_duplicate_media_queries(self):
        """if two media queries look exactly the same, it shouldn't fail.

        This is kinda hackish but it desperately tries to solve
        https://github.com/peterbe/mincss/issues/46
        """
        html = os.path.join(HERE, 'duplicate-media-queries.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)
        snippet = '@media screen and (min-width: 600px) {'
        eq_(p.inlines[0].after.count(snippet), 2)
Exemplo n.º 28
0
    def test_double_classes(self):
        html = os.path.join(HERE, 'five.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.links[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('input.span6' in after)
        ok_('.uneditable-input.span9' in after)
        ok_('.uneditable-{' not in after)
        ok_('.uneditable-input.span3' not in after)
Exemplo n.º 29
0
    def test_double_classes(self):
        html = os.path.join(HERE, 'five.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.links[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('input.span6' in after)
        ok_('.uneditable-input.span9' in after)
        ok_('.uneditable-{' not in after)
        ok_('.uneditable-input.span3' not in after)
Exemplo n.º 30
0
    def test_double_classes(self):
        html = os.path.join(HERE, "five.html")
        url = "file://" + html
        p = Processor()
        p.process(url)

        after = p.links[0].after
        eq_(after.count("{"), after.count("}"))
        ok_("input.span6" in after)
        ok_(".uneditable-input.span9" in after)
        ok_(".uneditable-{" not in after)
        ok_(".uneditable-input.span3" not in after)
Exemplo n.º 31
0
    def test_duplicate_media_queries(self):
        """if two media queries look exactly the same, it shouldn't fail.

        This is kinda hackish but it desperately tries to solve
        https://github.com/peterbe/mincss/issues/46
        """
        html = os.path.join(HERE, 'duplicate-media-queries.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)
        snippet = '@media screen and (min-width: 600px) {'
        eq_(p.inlines[0].after.count(snippet), 2)
Exemplo n.º 32
0
    def test_media_query_simple(self):
        html = os.path.join(HERE, 'four.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        link = p.links[0]
        after = link.after
        ok_('/* A comment */' in after, after)
        ok_('@media (max-width: 900px) {' in after, after)
        ok_('.container .two {' in after, after)
        ok_('.container .nine {' not in after, after)
        ok_('a.four' not in after, after)
Exemplo n.º 33
0
    def test_media_query_simple(self):
        html = os.path.join(HERE, "four.html")
        url = "file://" + html
        p = Processor()
        p.process(url)

        link = p.links[0]
        after = link.after
        ok_("/* A comment */" in after, after)
        ok_("@media (max-width: 900px) {" in after, after)
        ok_(".container .two {" in after, after)
        ok_(".container .nine {" not in after, after)
        ok_("a.four" not in after, after)
Exemplo n.º 34
0
    def test_media_query_simple(self):
        html = os.path.join(HERE, 'four.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        link = p.links[0]
        after = link.after
        ok_('/* A comment */' in after, after)
        ok_('@media (max-width: 900px) {' in after, after)
        ok_('.container .two {' in after, after)
        ok_('.container .nine {' not in after, after)
        ok_('a.four' not in after, after)
Exemplo n.º 35
0
def run(url):
    p = Processor()
    t0 = time.time()
    p.process(url)
    t1 = time.time()

    print("INLINES ".ljust(79, '-'))
    total_size_before = 0
    total_size_after = 0
    # for each in p.inlines:
    #     print("On line %s" % each.line)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     total_size_after += len(each.after)
    #     print("\n")
    #
    # print("LINKS ".ljust(79, '-'))
    # for each in p.links:
    #     print("On href %s" % each.href)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     print("\n")
    #
    # print("LINKS ".ljust(79, '-'))
    # for each in p.links:
    #     print("On href %s" % each.href)
    #     print('- ' * 40)
    #     print("BEFORE")
    #     print(each.before)
    #     total_size_before += len(each.before)
    #     print('- ' * 40)
    #     print("AFTER:")
    #     print(each.after)
    #     total_size_after += len(each.after)
    #     print("\n")

    print("TOOK:".ljust(20), "%.5fs" % (t1 - t0))
    print("TOTAL SIZE BEFORE:".ljust(20),
          "%.1fKb" % (total_size_before / 1024.0))
    print("TOTAL SIZE AFTER:".ljust(20),
          "%.1fKb" % (total_size_after / 1024.0))
Exemplo n.º 36
0
    def test_preserve_remote_urls(self):
        html = os.path.join(HERE, 'nine.html')
        url = 'file://' + html
        p = Processor(preserve_remote_urls=True)
        p.process(url)

        after = p.links[0].after
        ok_("url('http://www.google.com/north.png')" in after)
        url = 'file://' + HERE + '/deeper/south.png'
        ok_('url("%s")' % url in after)
        # since local file URLs don't have a domain, this is actually expected
        ok_('url("file:///east.png")' in after)
        url = 'file://' + HERE + '/west.png'
        ok_('url("%s")' % url in after)
Exemplo n.º 37
0
    def test_preserve_remote_urls(self):
        html = os.path.join(HERE, "nine.html")
        url = "file://" + html
        p = Processor(preserve_remote_urls=True)
        p.process(url)

        after = p.links[0].after
        ok_("url('http://www.google.com/north.png')" in after)
        url = "file://" + HERE + "/deeper/south.png"
        ok_('url("%s")' % url in after)
        # since local file URLs don't have a domain, this is actually expected
        ok_('url("file:///east.png")' in after)
        url = "file://" + HERE + "/west.png"
        ok_('url("%s")' % url in after)
Exemplo n.º 38
0
    def test_ignore_annotations(self):
        html = os.path.join(HERE, 'seven.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('/* Leave this comment as is */' in after)
        ok_('/* Lastly leave this as is */' in after)
        ok_('/* Also stick around */' in after)
        ok_('/* leave untouched */' in after)
        ok_('.north' in after)
        ok_('.south' in after)
        ok_('.east' not in after)
        ok_('.west' in after)
        ok_('no mincss' not in after)
Exemplo n.º 39
0
    def test_ignore_annotations(self):
        html = os.path.join(HERE, "seven.html")
        url = "file://" + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count("{"), after.count("}"))
        ok_("/* Leave this comment as is */" in after)
        ok_("/* Lastly leave this as is */" in after)
        ok_("/* Also stick around */" in after)
        ok_("/* leave untouched */" in after)
        ok_(".north" in after)
        ok_(".south" in after)
        ok_(".east" not in after)
        ok_(".west" in after)
        ok_("no mincss" not in after)
Exemplo n.º 40
0
    def test_ignore_annotations(self):
        html = os.path.join(HERE, 'seven.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)

        after = p.inlines[0].after
        eq_(after.count('{'), after.count('}'))
        ok_('/* Leave this comment as is */' in after)
        ok_('/* Lastly leave this as is */' in after)
        ok_('/* Also stick around */' in after)
        ok_('/* leave untouched */' in after)
        ok_('.north' in after)
        ok_('.south' in after)
        ok_('.east' not in after)
        ok_('.west' in after)
        ok_('no mincss' not in after)
Exemplo n.º 41
0
Arquivo: run.py Projeto: yifree/mincss
def run(args):
    options = {'debug': args.verbose}
    if args.phantomjs_path:
        options['phantomjs'] = args.phantomjs_path
    elif args.phantomjs:
        options['phantomjs'] = True
    p = Processor(**options)
    t0 = time.time()
    p.process(args.url)
    t1 = time.time()
    print("TOTAL TIME ", t1 - t0)
    for inline in p.inlines:
        print("ON", inline.url)
        print("AT line", inline.line)
        print("BEFORE ".ljust(79, '-'))
        print(inline.before)
        print("AFTER ".ljust(79, '-'))
        print(inline.after)
        print()

    output_dir = args.outputdir
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    for link in p.links:
        print("FOR", link.href)
        #print("BEFORE ".ljust(79, '-'))
        #print(link.before)
        #print("AFTER ".ljust(79, '-'))
        #print(link.after)
        orig_name = link.href.split('/')[-1]
        fn = os.path.join(output_dir, orig_name)
        with codecs.open(fn, 'w', 'utf-8') as f:
            f.write(link.after)
        before_name = 'before_' + link.href.split('/')[-1]
        fn = os.path.join(output_dir, before_name)
        with codecs.open(fn, 'w', 'utf-8') as f:
            f.write(link.before)
        print("Files written to", output_dir)
        print()
        print(
            '(from %d to %d saves %d)' %
            (len(link.before), len(link.after),
             len(link.before) - len(link.after))
        )

    return 0
Exemplo n.º 42
0
 def test_nth_child(self):
     html = os.path.join(HERE, 'nth-child.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     # These mouse related one should stay, even though they're
     # currently NOT being acted upon with some input device.
     ok_('a.actually:hover { font-weight: bold; }' in after)
     ok_('a.actually:visited { font-weight: bold; }' in after)
     ok_('a.actually:link { font-weight: bold; }' in after)
     ok_('a.actually:focus { font-weight: bold; }' in after)
     ok_('a.actually:active { font-weight: bold; }' in after)
     # the other selectors with : in them should also stay
     ok_('div > :first-child { color: pink; }' in after)
     ok_('div > :last-child { color: brown; }' in after)
     ok_('div > :not(p) { color: blue; }' in after)
     ok_('div > :nth-child(2) { color: red; }' in after)
Exemplo n.º 43
0
 def test_nth_child(self):
     html = os.path.join(HERE, 'nth-child.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     after = p.inlines[0].after
     # These mouse related one should stay, even though they're
     # currently NOT being acted upon with some input device.
     ok_('a.actually:hover { font-weight: bold; }' in after)
     ok_('a.actually:visited { font-weight: bold; }' in after)
     ok_('a.actually:link { font-weight: bold; }' in after)
     ok_('a.actually:focus { font-weight: bold; }' in after)
     ok_('a.actually:active { font-weight: bold; }' in after)
     # the other selectors with : in them should also stay
     ok_('div > :first-child { color: pink; }' in after)
     ok_('div > :last-child { color: brown; }' in after)
     ok_('div > :not(p) { color: blue; }' in after)
     ok_('div > :nth-child(2) { color: red; }' in after)
Exemplo n.º 44
0
def run():
    p = Processor()
    p.process(URL)

    print "/* LINKS ".ljust(797, '-')
    for each in p.links:
        print ("/* On href %s */" % each.href)
        print
        print each.after
        print
    print

    print "/* INLINES ".ljust(77, '-') + "*/"
    for each in p.inlines:
        print ("/* On line %s */" % each.line)
        print
        print each.after
        print
    print
Exemplo n.º 45
0
 def test_just_one_link(self):
     html = os.path.join(HERE, 'two.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     # two.html only has 1 link CSS ref
     link = p.links[0]
     eq_(link.href, 'two.css')
     ok_(len(link.after) < len(link.before))
     lines_after = link.after.splitlines()
     # compare line by line
     expect = '''
         body, html { margin: 0; }
         h1, h2, h3 { text-align: center; }
         h3 { font-family: serif; }
         h2 { color:red }
     '''
     for i, line in enumerate(expect.strip().splitlines()):
         eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 46
0
 def test_just_one_link(self):
     html = os.path.join(HERE, 'two.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     # two.html only has 1 link CSS ref
     link = p.links[0]
     eq_(link.href, 'two.css')
     ok_(len(link.after) < len(link.before))
     lines_after = link.after.splitlines()
     # compare line by line
     expect = '''
         body, html { margin: 0; }
         h1, h2, h3 { text-align: center; }
         h3 { font-family: serif; }
         h2 { color:red }
     '''
     for i, line in enumerate(expect.strip().splitlines()):
         eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 47
0
    def test_download_with_phantomjs(self):
        html = os.path.join(HERE, "one.html")
        url = "file://" + html
        p = Processor(phantomjs=PHANTOMJS, phantomjs_options={"cookies-file": "bla"})
        p.process(url)
        # on line 7 there inline css starts
        # one.html only has 1 block on inline CSS
        inline = p.inlines[0]
        lines_after = inline.after.strip().splitlines()
        eq_(inline.line, 7)
        ok_(len(inline.after) < len(inline.before))

        # compare line by line
        expect = """
            h1, h2, h3 { text-align: center; }
            h3 { font-family: serif; }
            h2 { color:red }
        """
        for i, line in enumerate(expect.strip().splitlines()):
            eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 48
0
    def test_just_inline(self):
        html = os.path.join(HERE, 'one.html')
        url = 'file://' + html
        p = Processor()
        p.process(url)
        # on line 7 there inline css starts
        # one.html only has 1 block on inline CSS
        inline = p.inlines[0]
        lines_after = inline.after.strip().splitlines()
        eq_(inline.line, 7)
        ok_(len(inline.after) < len(inline.before))

        # compare line by line
        expect = '''
            h1, h2, h3 { text-align: center; }
            h3 { font-family: serif; }
            h2 { color:red }
        '''
        for i, line in enumerate(expect.strip().splitlines()):
            eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 49
0
    def test_download_with_phantomjs(self):
        html = os.path.join(HERE, 'one.html')
        url = 'file://' + html
        p = Processor(phantomjs=PHANTOMJS,
                      phantomjs_options={'cookies-file': 'bla'})
        p.process(url)
        # on line 7 there inline css starts
        # one.html only has 1 block on inline CSS
        inline = p.inlines[0]
        lines_after = inline.after.strip().splitlines()
        eq_(inline.line, 7)
        ok_(len(inline.after) < len(inline.before))

        # compare line by line
        expect = '''
            h1, h2, h3 { text-align: center; }
            h3 { font-family: serif; }
            h2 { color:red }
        '''
        for i, line in enumerate(expect.strip().splitlines()):
            eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 50
0
def run(args):
    options = {'debug': args.verbose}
    if args.phantomjs_path:
        options['phantomjs'] = args.phantomjs_path
    elif args.phantomjs:
        options['phantomjs'] = True
    p = Processor(**options)
    t0 = time.time()
    p.process(args.url)
    t1 = time.time()
    print('TOTAL TIME ', t1 - t0)
    for inline in p.inlines:
        print('ON', inline.url)
        print('AT line', inline.line)
        print('BEFORE '.ljust(79, '-'))
        print(inline.before)
        print('AFTER '.ljust(79, '-'))
        print(inline.after)
        print()

    output_dir = args.outputdir
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    for link in p.links:
        print('FOR', link.href)
        orig_name = link.href.split('/')[-1]
        with io.open(os.path.join(output_dir, orig_name), 'w') as f:
            f.write(link.after)
        before_name = 'before_' + link.href.split('/')[-1]
        with io.open(os.path.join(output_dir, before_name), 'w') as f:
            f.write(link.before)
        print('Files written to', output_dir)
        print()
        print(
            '(from %d to %d saves %d)' %
            (len(link.before), len(link.after),
             len(link.before) - len(link.after))
        )

    return 0
Exemplo n.º 51
0
 def test_one_link_two_different_pages(self):
     html = os.path.join(HERE, 'two.html')
     url1 = 'file://' + html
     html_half = os.path.join(HERE, 'two_half.html')
     url2 = 'file://' + html_half
     p = Processor()
     p.process(url1, url2)
     # two.html only has 1 link CSS ref
     link = p.links[0]
     eq_(link.href, 'two.css')
     #eq_(link.url, url1.replace('.html', '.css'))
     ok_(len(link.after) < len(link.before))
     lines_after = link.after.splitlines()
     # compare line by line
     expect = '''
         body, html { margin: 0; }
         h1, h2, h3 { text-align: center; }
         h3 { font-family: serif; }
         .foobar { delete:me }
         .foobar, h2 { color:red }
     '''
     for i, line in enumerate(expect.strip().splitlines()):
         eq_(line.strip(), lines_after[i].strip())
Exemplo n.º 52
0
 def test_html_with_totally_empty_style_tag(self):
     html = os.path.join(HERE, 'one-3.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     eq_(p.inlines, [])
Exemplo n.º 53
0
 def test_make_absolute_url(self):
     p = Processor()
Exemplo n.º 54
0
 def test_no_mincss_inline(self):
     html = os.path.join(HERE, 'no-mincss-inline.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     eq_(p.inlines[0].before, p.inlines[0].after)
Exemplo n.º 55
0
 def test_ignore_inline(self):
     html = os.path.join(HERE, 'ignore-inline.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     assert not p.inlines
Exemplo n.º 56
0
def process(urls):
    p = Processor()
    p.process(*urls)
    return p
Exemplo n.º 57
0
def proxy(path):
    if path == 'favicon.ico':
        abort(404)
    url = path
    if not path.count('://'):
        url = 'http://' + url

    query = urlparse.urlparse(request.url).query
    if query:
        url += '?%s' % query
    logging.info('Downloading %s' % url)
    t0 = time.time()
    html = download(url)
    t1 = time.time()
    print "%.4f seconds to download" % (t1 - t0)

    p = Processor(debug=False, optimize_lookup=True)
    # since we've already download the HTML
    t0 = time.time()
    p.process_html(html, url)
    t1 = time.time()
    p.process()
    t2 = time.time()
    print "%.4f seconds to parse and process" % (t2 - t1)

    collect_stats = request.args.get('MINCSS_STATS', False)
    stats = []
    css_url_regex = re.compile('url\(([^\)]+)\)')

    def css_url_replacer(match, href=None):
        filename = match.groups()[0]
        bail = match.group()

        if ((filename.startswith('"') and filename.endswith('"'))
                or (filename.startswith("'") and filename.endswith("'"))):
            filename = filename[1:-1]
        if 'data:image' in filename or '://' in filename:
            return bail
        if filename == '.':
            # this is a known IE hack in CSS
            return bail

        #if not filename.startswith('/'):
        #    filename = os.path.normpath(
        #        os.path.join(
        #            os.path.dirname(href),
        #            filename
        #        )
        #    )

        new_filename = urlparse.urljoin(url, filename)
        return 'url("%s")' % new_filename

    for i, each in enumerate(p.inlines):
        # this should be using CSSSelector instead
        new_inline = each.after
        new_inline = css_url_regex.sub(
            functools.partial(css_url_replacer, href=url), new_inline)
        stats.append(('inline %s' % (i + 1), each.before, each.after))
        html = html.replace(each.before, new_inline)

    parser = etree.HTMLParser()
    stripped = html.strip()
    tree = etree.fromstring(stripped, parser).getroottree()
    page = tree.getroot()

    # lxml inserts a doctype if none exists, so only include it in
    # the root if it was in the original html.
    was_doctype = tree.docinfo.doctype
    #root = tree if stripped.startswith(tree.docinfo.doctype) else page

    links = dict((x.href, x) for x in p.links)

    #all_lines = html.splitlines()
    for link in CSSSelector('link')(page):
        if (link.attrib.get('rel', '') == 'stylesheet'
                or link.attrib['href'].lower().split('?')[0].endswith('.css')):
            hash_ = hashlib.md5(url + link.attrib['href']).hexdigest()[:7]
            now = datetime.date.today()
            destination_dir = os.path.join(
                CACHE_DIR,
                str(now.year),
                str(now.month),
                str(now.day),
            )
            mkdir(destination_dir)
            new_css = links[link.attrib['href']].after
            stats.append(
                (link.attrib['href'], links[link.attrib['href']].before,
                 links[link.attrib['href']].after))
            new_css = css_url_regex.sub(
                functools.partial(css_url_replacer, href=link.attrib['href']),
                new_css)
            destination = os.path.join(destination_dir, hash_ + '.css')

            with codecs.open(destination, 'w', 'utf-8') as f:
                f.write(new_css)

            link.attrib['href'] = ('/cache%s' %
                                   destination.replace(CACHE_DIR, ''))

    for img in CSSSelector('img, script')(page):
        if 'src' in img.attrib:
            orig_src = urlparse.urljoin(url, img.attrib['src'])
            img.attrib['src'] = orig_src

    for a in CSSSelector('a')(page):
        if 'href' not in a.attrib:
            continue
        href = a.attrib['href']

        if ('://' in href or href.startswith('#')
                or href.startswith('javascript:')):
            continue

        if href.startswith('/'):
            a.attrib['href'] = (
                '/' +
                urlparse.urljoin(url, a.attrib['href']).replace('http://', ''))
        #else:
        if collect_stats:
            a.attrib['href'] = add_collect_stats_qs(a.attrib['href'],
                                                    collect_stats)

    html = etree.tostring(page, method='html')
    if collect_stats:
        html = re.sub('<body[^>]*>',
                      lambda m: m.group() + summorize_stats_html(stats),
                      html,
                      flags=re.I | re.M,
                      count=1)

    return (was_doctype and was_doctype or '') + '\n' + html
Exemplo n.º 58
0
 def test_ignore_link(self):
     html = os.path.join(HERE, 'ignore-link.html')
     url = 'file://' + html
     p = Processor()
     p.process(url)
     assert not p.links
Exemplo n.º 59
0
 def test_make_absolute_url(self):
     p = Processor()
     eq_(
         p.make_absolute_url('http://www.com/', './style.css'),
         'http://www.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com', './style.css'),
         'http://www.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com', '//cdn.com/style.css'),
         'http://cdn.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/', '//cdn.com/style.css'),
         'http://cdn.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/', '/style.css'),
         'http://www.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/elsewhere', '/style.css'),
         'http://www.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/elsewhere/', '/style.css'),
         'http://www.com/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/elsewhere/', './style.css'),
         'http://www.com/elsewhere/style.css'
     )
     eq_(
         p.make_absolute_url('http://www.com/elsewhere', './style.css'),
         'http://www.com/style.css'
     )