示例#1
0
def htmlprep(file):
    html = htmldom(file)
    widthtags = list(tagsinList(html, tagsWithAttribute('width')))
    fixwidths(htmlgrid(html, widthtags))
    fixspans(html)
    notrwidth(html)
    return html
示例#2
0
def getcss(doc):

    def fullsplit(csstring):       
        for classname, cmds in cssplit1(csstring):
            subcss = dict()
            for cmd, value in cssplit2(cmds): subcss[cmd] = value
            cssdict[classname] = subcss
        
    import urllib    
    cssdict, count = dict(), 1
    for tag in tagsinList(doc, ['style', 'link']):
        if isname(tag, 'link'):
            if attget(tag, 'rel') == 'stylesheet':
                url = attget(tag, 'href')
                if url.find('http:') != -1: style = urllib.urlopen(url).read()
                else: style = open(urllib.url2pathname(url)).read()
                fullsplit(style)
        elif isname(tag, 'style'):
            for i in tag.childNodes:
                if istext(i) or iscomment(i): fullsplit(i.data)
    for tag in tagsbyAttr(doc, 'style'):
        subcss = dict()
        for cmd, value in cssplit2(attget(tag, 'style')): subcss[cmd] = value
        if subcss not in cssdict.values():
            cssdict[''.join(['.ecss', str(count)])] = subcss
            count += 1    
    return cssdict        
示例#3
0
def cssifier(doc, **kwargs):
        
    def insertcss(attdict):
        for id in attdict:
             if id[1] in map:
                mapping = map.get(id[1])
                if mapping:
                    if type(mapping) == types.TupleType:
                        if len(mapping) > 1:
                            for item in mapping:
                                if item.find('%s') != -1:
                                    value = attlist.get(id).value
                                    try:
                                        if int(value):
                                            value = ''.join([value, 'px'])
                                    except ValueError: pass
                                    delete.append(id)
                                    yield item % value
                                else:
                                    delete.append(id)
                                    yield item
                        elif len(mapping) == 1:
                            if mapping not in css:
                                value = tag.attributes.get(id).value
                                css[mapping % value] = 0
                                delete.append(id)
                    elif type(mapping) == types.DictType:
                        value = mapping.get(tag.attributes.get(id).value)
                        if value:
                            delete.append(id)
                            yield value
                    elif mapping.find('%s') != -1:
                        value = tag.attributes.get(id).value
                        try:
                            if int(value): value = ''.join([value, 'px'])
                        except ValueError: pass
                        delete.append(id)
                        yield mapping % value

    def recss(cdict):
        return '; '.join(['%s: %s' % (i, j) for i, j in cdict.iteritems()])

    from htmldefs import html2css
    import types
    if 'embed' in kwargs: embed = 1
    else: embed = None
    if 'file' in kwargs: file = kwargs['file']
    else: file = None
    css, tags, count = getcss(doc), list(tagsbyNS(htmlns, doc)), 1
    for tag in tags:
        if tag.localName in html2css:
            delete, map = list(), html2css.get(tag.localName)
            tcss, scss = list(insertcss(tag.attributes)), dict()
            if len(delete):
                for i in delete: del tag.attributes[i]
            if len(tcss):
                for cmd, value in cssplit2('; '.join(tcss)): scss[cmd] = value
                if hasatt(tag, 'class'):
                    scss.update(css.get(''.join(['.', attget(tag, 'class')])))
                    attdel(tag, 'class')
                if hasatt(tag, 'style'):
                    style = css.get(getattr(tag, 'style'))
                    for cmd, value in cssplit2(style): scss[cmd] = value
                    attdel(tag, 'style')
                if scss not in css.values():
                    classvalue = ''.join(['css', str(count)])
                    css[''.join(['.', classvalue])] = scss
                    count += 1
                elif scss in css.values():
                    for i, j in css.iteritems():
                        if j == scss: classvalue = i.strip('.')
                if embed or file: attset(tag, 'class', classvalue)
                else: attset(tag, 'style', recss(scss))
    if embed or file:
        cssgather = [('%s {%s}' % (i, recss(css.get(i)))) for i in css]
        cssgather.sort()
        cssmaster = '\n'.join(cssgather)
        for i in tagsinList(doc, ['style', 'link']):
            if isname(i, 'link'):
                if hasatt(i, 'rel'):
                    if attget(i, 'rel') == 'stylesheet':
                        i.parentNode.removeChild(i)
            elif isname(i, 'style'): i.parentNode.removeChild(i)
        if kwargs:
            if embed:
                style = doc.createElementNS(htmlns, 'style')
                style.appendChild(doc.createComment(cssmaster))
                list(tags(doc, htmlns, 'head'))[0].appendChild(style)
            elif file:
                import urllib
                open(file, 'wb').write(cssmaster)
                style = doc.createElementNS(htmlns, 'link')
                attset(style, 'rel', 'stylesheet')
                attset(style, 'href', urllib.pathname2url(file))
            attset(style, 'type', 'text/css')
            list(tags(doc, htmlns, 'head'))[0].appendChild(style)