Exemplo n.º 1
0
def makehtml(itemid,doc,abbyyfile,abbyyhtml,scanfile,metafile,olid):
    classmap={}
    # print "Generating HTML file from abbyy scan file"
    scanstream=open(scanfile)
    scaninfo=parse_scandata(scanstream)
    close(scanstream)
    abbyystream=open(abbyyfile)
    metadata=getmetaxml(metafile)
    if (olid): bookid=olid
    else: bookid=itemid
    # print "Generating body content"
    pars=abbyyhtml.makehtmlbody(abbyystream,bookid,itemid,doc,
                                classmap=classmap,
                                scaninfo=scaninfo)
    # print "Wrapping body with metadata, etc"
    result=u"<?xml version='1.0' encoding='utf-8' ?>\n<?xml version='1.0' encoding='utf-8' ?>\n<!DOCTYPE html>\n"
    result=result+u"<html>\n<head>\n"
    style=abbyy_css
    classhist=classmap['%histogram']
    for x in classmap:
        if not(x.startswith('%')):
            style=style+(".%s { %s } /* used %d times */\n"%
                         (classmap[x],x,classhist[classmap[x]]))
    result=result+bighead.bighead(metadata,olid,style)
    result=result+u"\n</head>\n<body class='abbyytext'>"
    for par in pars:
        result=result+"\n"+par
    if wrap:
        result=result+"\n</body>\n</html>\n"
    return result
Exemplo n.º 2
0
def makehtml(bookid,doc=False,path=False,
             abbyyref=False,scanref=False,
             wrap=True,mergepages=True):
    if not doc: doc=bookid
    classmap={}
    olib=False
    olid=False
    if doc is bookid:
        olib=olib_lookup(bookid)
        if olib: olid=os.path.basename(olib['key'])
    if olid:
        idprefix=olid
    else:
        idprefix=bookid
    # print "Generating HTML file from abbyy scan file"
    if scanref: scanstream=tryopen(scanref)
    else: scanstream=find_scandata(bookid,doc,path)
    if scanstream:
        scaninfo=parse_scandata(scanstream)
    else:
        scaninfo=False
    abbyystream=False
    if abbyyref:
        abbyystream=tryopen(abbyyref)
        if not abbystream:
            raise Exception("Can't open abbyy file '%s'"%abbyyref)
    else:
        abbyystream=find_abbyy(bookid,doc,path)
        if not abbyystream:
            raise Exception("Can't find abbyy file for '%s/%s' under '%s'"%
                        (bookid,doc,path))
    # print "Generating body content"
    pars=abbyyhtml.makehtmlbody(abbyystream,idprefix,bookid,doc,
                                classmap=classmap,
                                scaninfo=scaninfo)
    if wrap:
        # print "Wrapping body with metadata, etc"
        result="<?xml version='1.0' encoding='utf-8' ?>\n<?xml version='1.0' encoding='utf-8' ?>\n<!DOCTYPE html>\n"
        result=result+"<html>\n<head>\n"
        style=abbyy_css
        classhist=classmap['%histogram']
        for x in classmap:
            if not(x.startswith('%')):
                style=style+(".%s { %s } /* used %d times */\n"%
                             (classmap[x],x,classhist[classmap[x]]))
        result=result+bighead.bighead(olid,style).encode("utf-8")
        result=result+"\n</head>\n<body class='abbyytext'>"
    else:
        result=""
    for par in pars:
        result=result+"\n"+par
    if wrap:
        result=result+"\n</body>\n</html>\n"
    return result
Exemplo n.º 3
0
def remakehtml(olid,body,style):
    result="<?xml version='1.0' encoding='utf-8' ?>\n<?xml version='1.0' encoding='utf-8' ?>\n<!DOCTYPE html>\n"
    result=result+"<html>\n<head>\n"
    result=result+bighead.bighead(olid,style).encode("utf-8")
    result=result+"\n</head>\n<body class='abbyytext'>"+body+"</body>"
    return result