示例#1
0
def getcss(doc):

    def fullsplit(csstring):       
        for classname, cmds in cssplit1(csstring):
            subcss = dict()
            for cmd, value in cssplit2(cmds): subcss[cmd] = value
            cssdict[classname] = subcss
        
    import urllib    
    cssdict, count = dict(), 1
    for tag in tagsinList(doc, ['style', 'link']):
        if isname(tag, 'link'):
            if attget(tag, 'rel') == 'stylesheet':
                url = attget(tag, 'href')
                if url.find('http:') != -1: style = urllib.urlopen(url).read()
                else: style = open(urllib.url2pathname(url)).read()
                fullsplit(style)
        elif isname(tag, 'style'):
            for i in tag.childNodes:
                if istext(i) or iscomment(i): fullsplit(i.data)
    for tag in tagsbyAttr(doc, 'style'):
        subcss = dict()
        for cmd, value in cssplit2(attget(tag, 'style')): subcss[cmd] = value
        if subcss not in cssdict.values():
            cssdict[''.join(['.ecss', str(count)])] = subcss
            count += 1    
    return cssdict        
示例#2
0
文件: som.py 项目: lcrees/psilib
        def _getByName(self, childList, name):
            '''Retrieves child nodes by name

            Arguments:
            childlist -- list of specific nodes
            child -- specific child node'''
            for i in childList:
                if attget(i, u'name') == name: return i
示例#3
0
def fixspans(doc):
    tables = list(tags(doc, htmlns, 'table'))
    for table in tables:
        trcount, tdcount = 0, 0
        for tr in table.childNodes:
            if tr.localName == 'tr': trcount += 1
        for tr in table.childNodes:
            count = 0
            for td in tr.childNodes:
                if td.localName == 'td': count += 1
            if count > tdcount: tdcount = count
        for tr in table.childNodes:
            for td in tr.childNodes:
                if iselement(td):
                    if hasatt(td, 'rowspan'):
                        if int(attget(td, 'rowspan')) > trcount:
                            attset(td, 'rowspan', str(trcount))
                    elif hasatt(td, 'colspan'):
                        if int(attget(td, 'colspan')) > tdcount:
                            attset(td, 'colspan', str(tdcount))
        grid, temp = [], []
        for tr in table.childNodes:
            if iselement(tr) and isname(tr, 'tr'):
                if len(temp):
                    grid.append(temp)
                    temp = []
                for td in tr.childNodes:
                    if iselement(td) and isname(td, 'td'): temp.append(td)
        if len(temp): grid.append(temp)
        rowspans = list(tagsbyAttr(table, 'rowspan'))
        if len(rowspans):
            for td in rowspans:
                trs, sibs = [], ['td.parentNode']
                for x in range(int(attget(td, 'rowspan'))-1):
                    sibs.append('.nextSibling')
                    if eval(''.join(sibs)): trs.append(eval(''.join(sibs)))
                for nodelist in grid:
                    for i in nodelist:
                        if td in nodelist: loc = nodelist.index(td)
                    for tr in trs:
                        if tr.firstChild in nodelist: nodelist.insert(loc, td)
        for nodelist in grid:
            for td in nodelist:
                if hasatt(td, 'colspan'):
                    index = nodelist.index(td)
                    colspan = int(attget(td, 'colspan'))
                    offset, largest = len(nodelist[:index]), 0
                    for nlist in grid:
                        actual = len(nlist[index:-offset])
                        if colspan > actual:
                            if actual > largest:
                                count = 0
                                for i in nlist[index:-offset]:
                                    if hasatt(i, 'colspan'):
                                        count += int(attget(i, 'colspan'))
                                if count != colspan: largest = actual
                    if largest: attset(td, 'colspan', str(largest))
                    if int(attget(td, 'colspan')) <= 1: attdel(td, 'colspan')
示例#4
0
def htmlgrid(doc, structure):
    grid, temp, blist = [], [], []
    for node in structure:
        if isname(node, 'tr'):
            if len(temp):
                if temp[0].parentNode in grid[-1]:
                    if temp not in grid: grid.append(temp)
                    grid.append([node])
                else:
                    grid.append([node])
                    if temp not in grid: grid.append(temp)
            else: grid.append([node])
            temp = []
        elif isname(node, 'td'):
            if node not in blist:
                temp.append(node)
                blist.append(node)
            sibs = ['node.nextSibling']
            for x in range(len(node.parentNode.childNodes)-1):
                neval = ''.join(sibs)
                if eval(neval):
                    if eval('.'.join([neval, 'localName'])) == 'td':
                        if eval(neval) not in blist:
                            if eval(neval) in structure:
                                temp.append(eval(neval))
                                blist.append(eval(neval))
                                sibs.append('.nextSibling')
        elif isname(node, 'table'):
            if len(temp):
                if temp not in grid: grid.append(temp)
            grid.append([node])
    if len(temp): grid.append(temp)
    rowspans = list(tagsbyAttr(doc, 'rowspan'))
    if len(rowspans):
        for td in rowspans:
            trs, sibs = [], ['td.parentNode']
            for x in range(int(attget(td, 'rowspan'))-1):
                sibs.append('.nextSibling')
                if eval(''.join(sibs)): trs.append(eval(''.join(sibs)))
            for nodelist in grid:
                for tr in trs:
                    if tr.firstChild in nodelist: nodelist.append(td)
    return grid
示例#5
0
def cssifier(doc, **kwargs):
        
    def insertcss(attdict):
        for id in attdict:
             if id[1] in map:
                mapping = map.get(id[1])
                if mapping:
                    if type(mapping) == types.TupleType:
                        if len(mapping) > 1:
                            for item in mapping:
                                if item.find('%s') != -1:
                                    value = attlist.get(id).value
                                    try:
                                        if int(value):
                                            value = ''.join([value, 'px'])
                                    except ValueError: pass
                                    delete.append(id)
                                    yield item % value
                                else:
                                    delete.append(id)
                                    yield item
                        elif len(mapping) == 1:
                            if mapping not in css:
                                value = tag.attributes.get(id).value
                                css[mapping % value] = 0
                                delete.append(id)
                    elif type(mapping) == types.DictType:
                        value = mapping.get(tag.attributes.get(id).value)
                        if value:
                            delete.append(id)
                            yield value
                    elif mapping.find('%s') != -1:
                        value = tag.attributes.get(id).value
                        try:
                            if int(value): value = ''.join([value, 'px'])
                        except ValueError: pass
                        delete.append(id)
                        yield mapping % value

    def recss(cdict):
        return '; '.join(['%s: %s' % (i, j) for i, j in cdict.iteritems()])

    from htmldefs import html2css
    import types
    if 'embed' in kwargs: embed = 1
    else: embed = None
    if 'file' in kwargs: file = kwargs['file']
    else: file = None
    css, tags, count = getcss(doc), list(tagsbyNS(htmlns, doc)), 1
    for tag in tags:
        if tag.localName in html2css:
            delete, map = list(), html2css.get(tag.localName)
            tcss, scss = list(insertcss(tag.attributes)), dict()
            if len(delete):
                for i in delete: del tag.attributes[i]
            if len(tcss):
                for cmd, value in cssplit2('; '.join(tcss)): scss[cmd] = value
                if hasatt(tag, 'class'):
                    scss.update(css.get(''.join(['.', attget(tag, 'class')])))
                    attdel(tag, 'class')
                if hasatt(tag, 'style'):
                    style = css.get(getattr(tag, 'style'))
                    for cmd, value in cssplit2(style): scss[cmd] = value
                    attdel(tag, 'style')
                if scss not in css.values():
                    classvalue = ''.join(['css', str(count)])
                    css[''.join(['.', classvalue])] = scss
                    count += 1
                elif scss in css.values():
                    for i, j in css.iteritems():
                        if j == scss: classvalue = i.strip('.')
                if embed or file: attset(tag, 'class', classvalue)
                else: attset(tag, 'style', recss(scss))
    if embed or file:
        cssgather = [('%s {%s}' % (i, recss(css.get(i)))) for i in css]
        cssgather.sort()
        cssmaster = '\n'.join(cssgather)
        for i in tagsinList(doc, ['style', 'link']):
            if isname(i, 'link'):
                if hasatt(i, 'rel'):
                    if attget(i, 'rel') == 'stylesheet':
                        i.parentNode.removeChild(i)
            elif isname(i, 'style'): i.parentNode.removeChild(i)
        if kwargs:
            if embed:
                style = doc.createElementNS(htmlns, 'style')
                style.appendChild(doc.createComment(cssmaster))
                list(tags(doc, htmlns, 'head'))[0].appendChild(style)
            elif file:
                import urllib
                open(file, 'wb').write(cssmaster)
                style = doc.createElementNS(htmlns, 'link')
                attset(style, 'rel', 'stylesheet')
                attset(style, 'href', urllib.pathname2url(file))
            attset(style, 'type', 'text/css')
            list(tags(doc, htmlns, 'head'))[0].appendChild(style)
示例#6
0
 def getwidth(node):
     return attget(node, 'width')    
示例#7
0
def fixwidths(grid):

    def haswidth(node):
        return hasatt(node, 'width')

    def getwidth(node):
        return attget(node, 'width')    

    def setwidth(node, width):
        attset(node, 'width', width)
                
    def regtest(nodelist):
        for i in nodelist:
            if haswidth(i):
                if getwidth(i).find('%') == -1: return True

    def element(name):
        return node.ownerDocument.createElementNS(htmlns, name)
        
    maxwidths, count, mixed = [], 0, None
    for nodelist in grid:
        maxwidths.append(count)
        count = 0
        for node in nodelist:
            if haswidth(node):
                width = getwidth(node)
                if width.find('%') == -1: count += int(width)
    if len(maxwidths):
        maxwidths.sort()
        maxwidths.reverse()
        maxwidth = str(maxwidths[0])
        setwidth(grid[0][0], maxwidth)
    else: return None
    for nodelist in grid[1:]:
        nowidths, percents, count = [], [], 0
        for node in nodelist:
            pwidth = getwidth(node.parentNode)
            if pwidth == '': pwidth = maxwidth
            if len(nodelist) > 1:
                if haswidth(node):
                    width = getwidth(node)
                    if width.find('%') == -1: count += int(width)
                    elif width.find('%') != -1: percents.append(node)
                else: nowidths.append(node)
            else: setwidth(node, pwidth)
        if len(nodelist) > 1:
            pwidth = int(pwidth)
            if count: pwidth -= count
            if len(nowidths): portion = pwidth / len(nowidths)
            if len(percents) > 1 and regtest(nodelist): mixed = True
            for node in percents:
                width = round((float(getwidth(node).strip('%'))*0.01)*pwidth)
                attset(node, 'width', str(int(width)))
            for node in nowidths: setwidth(node, str(portion))
            if mixed:
                tds = []
                for node in percents:
                    if node not in tds and isname(node, 'td'):
                        sibs = ['node']
                        for x in range(len(percents)):
                            sibs.append('.nextSibling')
                            if eval(''.join(sibs)) in percents:
                                if node not in tds: tds.append(node)
                                tds.append(eval(''.join(sibs)))
                        if len(tds):
                            tcount, td = 0, element('td')
                            tds[0].parentNode.insertBefore(td, tds[0])
                            tr, table = element('tr'), element('table')
                            share, heights = dict(), list() 
                            for t in tds:
                                att, delete = dict(), dict()
                                if len(share):
                                    for a in t.attributes:
                                        attr = t.attributes.get(a)
                                        att[attr.name] = attr.value
                                else:
                                    for a in t.attributes:
                                        attr = t.attributes.get(a)
                                        share[attr.name] = attr.value
                                if len(att):
                                    for x, y in share.iteritems():
                                        if (x, y) not in att.iteritems():
                                            delete[x] = y
                                    for x in delete: del share[x]
                                tcount += int(getwidth(t))
                                tr.appendChild(t)
                            setwidth(td, str(tcount))
                            if len(share):
                                for n, v in share.iteritems(): attset(td, n, v)
                            attset(table, 'border', '0')
                            attset(table, 'cellspacing', '0')
                            attset(table, 'cellpadding', '0')
                            setwidth(table, str(tcount))
                            td.appendChild(table)
                            table.appendChild(tr)
                            nodelist.append(td)
                            for x in nodelist:
                                if hasatt(x, 'height'):
                                    h = attget(x, 'height')
                                    if h.find('%') == -1: heights.append(h)
                            if len(heights):
                                heights.sort()
                                heights.reverse()
                                max = str(heights[0])
                                for x in nodelist: attset(x, 'height', max)
            mixed = None