def getGoogleFont(name): cssurl = "http://fonts.googleapis.com/css?family=" + name try: with urllib.request.urlopen(cssurl) as r: contents = r.read().decode('utf-8') #contents = """ #@font-face { # font-family: 'Tangerine'; # font-style: normal; # font-weight: 700; # src: local('Tangerine Bold'), local('Tangerine-Bold'), url(http://fonts.gstatic.com/s/tangerine/v7/UkFsr-RwJB_d2l9fIWsx3onF5uFdDttMLvmWuJdhhgs.ttf) format('truetype'); #}""" except urllib.error.URLError as e: fatal("Cannot find google font " + name + ": " + e.reason) dprint(1, str(contents)) m = re.search("url\((.*?)\)[ ;]", str(contents)) if not m: fatal("Bad font file " + cssurl + " from google: " + str(contents)) url = m.group(1) dprint(1, "Remote ttf: " + url) with urllib.request.urlopen(url) as r: ttf = r.read() # Turn something like Tangerine:bold into Tangerine-bold #basename = re.sub(":", "-", name) basename = name.replace(":", "-") localFile = "images/font-" + basename + ".ttf" with open(localFile, "wb") as f: f.write(ttf) f.close() dprint(1, "Brought google font into " + localFile) return localFile
def defineTemplate(self, opts, block): attributes = parseTagAttributes("template", opts, [ "name", "type" ]) if not "name" in attributes or not "type" in attributes: fatal("Template definition requires both name and type attributes: " + opts) name = attributes["name"] type = attributes["type"] dprint(1, "defining template name " + name + " of type " + type + ": " + str(block)) self.add(type, name, block) return []
def isOpt(self, k, default): if k in self.opt: if self.opt[k] == "true": return True elif self.opt[k] == "false": return False else: fatal("Option " + k + ": must be true or false, not " + self.opt[k]) return default
def expandMacro(self, opts): attributes = parseTagAttributes("expand-macro", opts, [ "name", "vars" ]) if not "name" in attributes: fatal("expand-macro: No macro name given: " + line) template = self.get([ "macro" ], attributes["name"]) if "vars" in attributes: keys = parseOption("expand-macro", attributes["vars"]) else: keys = {} return template.expand(keys)
def getOptEnum(self, k, map, default): tagValue = self.getopt(k) if tagValue == '': return default elif tagValue in map: return map[tagValue] else: fatal("Option " + k + ": Illegal value '" + tagValue + "'. Legal values are: " + ', '.join(k for k in map)) return None
def parseOption(tag, arg, legalOptions = None): try: options = parseOption1(arg) if legalOptions != None: for option in options.keys(): if not option in legalOptions: raise Exception("Option " + option + ": Unknown option in " + arg) except Exception as e: fatal(tag + ": " + str(e)) return options
def parseTagAttributes(tag, arg, legalAttributes = None): try: # TODO: Move this up attributes = parseTagAttributes1(arg) if legalAttributes != None: for attribute in attributes.keys(): if not attribute in legalAttributes: raise Exception("Keyword " + attribute + ": Unknown keyword in " + arg) except Exception as e: fatal(tag + ": " + str(e)) return attributes
def footnoteRelocate(opts, block): opts = opts.strip() # Parse and recreate the footnote tag, to handle autonumbering footnotes args = parseTagAttributes("footnote", opts, [ "id" ]) if not "id" in args: fatal("<footnote> does not have id attribute: " + opts) id = args["id"] target = None if id == '#': nonlocal footnotec id = str(footnotec) if mode == asterisk and footnotec <= len(footnoteMarkers): displayid = footnoteMarkers[footnotec-1] else: displayid = "[" + id + "]" footnotec += 1 else: if id in footnoteMarkers: i = footnoteMarkers.index(id) displayid = id # Don't add the square brackets target = footnoteMarkersText[i] if not reset: fatal("Use of explicit footnote symbols requires footnote-location to be set to either asterisk or heading-reset: " + str(opts)) else: displayid = "[" + id + "]" if target == None: target = id if reset: target += "_" + str(footnoteChapter) opts = "id='" + displayid + "' target='" + target + "'" # Handle fn tags inside footnotes! relocateFootnotes(block) # Recreate the block block.insert(0, "<footnote " + opts + ">") block.append("</footnote>") if emitAtReference: noteMap[target] = block return [] # If we aren't supposed to move footnotes, do nothing if mode == none: return block # Otherwise accumulate them for emitting elsewhere nonlocal notes notes.append(block) # Clear the current location of the footnote return []
def FNtoHtml(wb): matchFN = re.compile("<fn\s+(.*?)/?>") footnotes = {} # footnote marks in text i = 0 while i < len(wb): off = 0 line = wb[i] block = [ ] while True: m = matchFN.search(line, off) if not m: break opts = m.group(1) args = parseTagAttributes("fn", opts, [ "id", "target" ]) fmid = args["id"] if not "target" in args: fatal("Missing internal target in fn: " + line) target = args["target"] dprint(1, "id: " + fmid + ", target: " + target) repl = "<sup><span style='font-size:0.9em'>" + fmid + "</span></sup>" if target in noteMap: # Note no link when we are co-locating the reference with the footnote block.extend(noteMap[target]) del noteMap[target] elif fmid in footnotes and footnotes[fmid] == target: wprint('multifootnote', "warning: footnote id <fn id='" + fmid + "'> occurs multiple times. <footnote> link will be to the first. Line: >>>" + line + "<<<") repl = "<a href='#f{0}' style='text-decoration:none'>{1}</a>".format(target, repl) else: footnotes[fmid] = target repl = "<a id='r{0}'/><a href='#f{0}' style='text-decoration:none'>{1}</a>".format(target, repl) l = line[0:m.start(0)] + repl off = len(l) # Next loop line = l + line[m.end(0):] wb[i] = line # Emit the footnote, right before the line with the footnote reference # If the line with the footnote reference is a paragraph start, need # to emit the footnote between the paragraph tag, and the rest of the # text on the first line of the paragraph if len(block) > 0: m = re.match("(<p.*?>)", wb[i]) if m: block.insert(0, m.group(1)) wb[i] = wb[i][len(m.group(1)):] wb[i:i] = block i += len(block) i += 1
def footnotesToHtml(wb): matchFN = re.compile("<fn\s+(.*?)/?>") footnotes = {} # footnote marks in text i = 0 while i < len(wb): off = 0 line = wb[i] while True: m = matchFN.search(line, off) if not m: break opts = m.group(1) args = parseTagAttributes("fn", opts, [ "id", "target" ]) fmid = args["id"] if not "target" in args: fatal("Missing internal target in fn: " + line) target = args["target"] dprint(1, "id: " + fmid + ", target: " + target) if fmid in footnotes and footnotes[fmid] == target: cprint("warning: footnote id <fn id='" + fmid + "'> occurs multiple times. <footnote> link will be to the first.") repl = "<a href='#f{0}' style='text-decoration:none'><sup><span style='font-size:0.9em'>{1}</span></sup></a>".format(target, fmid) else: footnotes[fmid] = target repl = "<a id='r{0}'/><a href='#f{0}' style='text-decoration:none'><sup><span style='font-size:0.9em'>{1}</span></sup></a>".format(target, fmid) l = line[0:m.start(0)] + repl off = len(l) # Next loop line = l + line[m.end(0):] wb[i] = line i += 1 # footnote targets and text i = 0 while i < len(wb): m = re.match("<footnote\s+(.*?)>", wb[i]) if m: opts = m.group(1) args = parseTagAttributes("footnote", opts, [ "id", "target" ]) fnid = args["id"] target = args["target"] wb[i] = "<div id='f{0}'><a href='#r{0}'>{1}</a></div>".format(target, fnid) while not re.match("<\/footnote>", wb[i]): i += 1 wb[i] = "</div> <!-- footnote end -->" i += 1
def parseStandaloneTagBlock(lines, tag, function, allowClose = False, lineFunction = None): i = 0 startTag = "<" + tag endTag = "</" + tag + ">" regex = re.compile(startTag + "(.*?)(/)?>") while i < len(lines): if lineFunction != None: insertion = lineFunction(i, lines[i]) lines[i:i+1] = insertion i += len(insertion)-1 m = regex.match(lines[i]) if not m: i += 1 continue openLine = lines[i] openArgs = m.group(1) block = [] close = m.group(2) if close != None: if not allowClose: fatal("Open tag " + tag + " marked for close. " + openLine) j = i else: j = i+1 while j < len(lines): line = lines[j] if line.startswith(endTag): break if line.startswith(startTag): fatal("No closing tag found for " + tag + "; open line: " + openLine + "; found another open tag: " + line) block.append(line) j += 1 if j == len(lines): fatal("No closing tag found for " + tag + "; open line: " + openLine) replacement = function(openArgs, block) lines[i:j+1] = replacement i += len(replacement) return lines
def parseEmbeddedSingleLineTagWithContent(line, tag, function): origLine = line startTag = "<" + tag startLen = len(startTag) endTag = "</" + tag + ">" endLen = len(endTag) off = 0 while True: startTagOff = line.find(startTag, off) if startTagOff == -1: if endTag in line: fatal("Found closing tag " + endTag + " without open tag: " + line) return line s = list(line) s.append('\0') startArg = startTagOff + startLen c = s[startArg] i = startArg if c != '>' and c != ' ': # Nope, not really this tag off = i continue # Look for end of arg while s[i] != '>' and s[i] != '\0': i += 1 if s[i] == '\0': # Again, not really this tag? <tag xxx without closing greater... off = i continue if s[i-1] == '/': fatal("Open tag " + tag + " is marked for close; this tag requires an open and close tag, with text between: " + line) arg = line[startArg:i].strip() startContent = i+1 endTagOff = line.find(endTag, startContent) if endTagOff == -1: fatal("Open tag " + tag + " found, no closing tag: " + line) content = line[startContent:endTagOff] repl = function(arg, content, origLine) leftPart = line[0:startTagOff] rightPart = line[endTagOff+endLen:] line = leftPart + repl + rightPart off = len(leftPart) + len(repl)
def main(): # process command line parser = OptionParser() parser.add_option("-i", "--infile", dest="infile", default="", help="input file") parser.add_option("-f", "--format", dest="formats", default="th", help="format=thkep (text,HTML,Kindle,Epub,PDF)") parser.add_option("-d", "--debug", dest="debug", default="0", help="set debug mode level") parser.add_option("", "--save", action="store_true", dest="saveint", default=False, help="save intermediate file") parser.add_option("--unittest", action="store_true", dest="unittest", default=False, help="run unittests") parser.add_option("", "--ebookid", dest="ebookid", default="", help="Create fadedpage zip file") (options, args) = parser.parse_args() print("fpgen {}".format(config.VERSION)) if options.unittest: sys.argv = sys.argv[:1] l = unittest.TestLoader(); tests = [] from testtable import TestParseTableColumn, TestMakeTable, TestTableCellFormat from parse import TestParseTagAttributes, TestParsing from drama import TestDrama, TestOneDramaBlockMethod from testtext import TestTextInline, TestTextRewrap from footnote import TestFootnote from template import TestTemplate from testhtml import TestHTMLPara for cl in [ TestParseTableColumn, TestMakeTable, TestDrama, TestParsing, TestParseTagAttributes, TestOneDramaBlockMethod, TestTextRewrap, TestTextInline, TestTableCellFormat, TestTemplate, TestFootnote, TestHTMLPara ]: tests.append(l.loadTestsFromTestCase(cl)) tests = l.suiteClass(tests) unittest.TextTestRunner(verbosity=2).run(tests) exit(0) if options.ebookid != "": options.formats = "thkep" if not re.match("^20[01]\d[01]\d[0-9a-zA-Z][0-9a-zA-Z]$", options.ebookid): fatal("Ebookid doesn't look correct: " + options.ebookid) tmp = options.formats tmp = re.sub('a|h|t|k|e|p', '', tmp) if not tmp == '': fatal("format option {} not supported".format(tmp)) # 'a' format is 'all' if options.formats == 'a': options.formats = "htpek" # Can either use -i file, or just file. if len(args) > 1: fatal("Too many positional options") if len(args) == 1: if options.infile == '': options.infile = args[0] else: fatal("Positional argument is incompatible with the file option -i/--infile") # Nothing specified? See if exactly one file matching *-src.txt in current dir if options.infile == '': for file in os.listdir('.'): if fnmatch.fnmatch(file, '*-src.txt'): if options.infile != '': fatal("Input file not specified; multiple found in the current directory.") options.infile = file if options.infile == '': fatal("Missing source file option -i/--infile") # check input filename m = re.match('(.*?)-src.txt', options.infile) if not m: print("source filename must end in \"-src.txt\".") print("example: midnight-src.txt will generate midnight.html, midnight.txt") exit(1) else: input = m.group(1) try: processFile(options, input) except FileNotFoundError: fatal(options.infile + ": File not found")
def parseLineEntry(tag, line): pattern = "^<" + tag + "\s*(.*?)>(.*)</" + tag + ">$" m = re.match(pattern, line) if not m: fatal("Incorrect line: " + line) return m.group(1), m.group(2)
def parseEmbeddedTagBlock(lines, tag, function): i = 0 startTag = "<" + tag endTag = "</" + tag + ">" regex = re.compile(startTag + "(.*?)>") regexEnd = re.compile(endTag) while i < len(lines): m = regex.search(lines[i]) if not m: i += 1 continue openLine = lines[i] openArgs = m.group(1) block = [] startLineStart = openLine[:m.start(0)] startLineTrailer = openLine[m.end(0):] line = startLineTrailer startLineNumber = i j = i while True: m = regexEnd.search(line) if m: endLineStart = line[:m.start(0)] endLineTrailer = line[m.end(0):] if endLineStart != "": block.append(endLineStart) endLineNumber = j if len(block) == 0 and startLineNumber != endLineNumber: block.append("") break if j == startLineNumber: if startLineTrailer != "": block.append(startLineTrailer) else: block.append(line) j += 1 if j == len(lines): fatal("No closing tag found for " + tag + "; open line: " + openLine) line = lines[j] replacement = function(openArgs, block) # put startLine at the beginning of the first line, # put endLineTrailer at the end of the last line. n = len(replacement) if n == 0: if startLineNumber == endLineNumber: # <tag>xxx</tag> => "" # b4<tag>xxx</tag> => b4 # b4<tag>xxx</tag>after => b4after # <tag>xxx</tag>after => after replacement.append(startLineStart + endLineTrailer) else: # <tag>x\nx</tag> => "", "" # b4<tag>x\nx</tag> => b4, "" # b4<tag>x\nx</tag>after => b4, after # <tag>x\nx</tag>after => "", after replacement.append(startLineStart) replacement.append(endLineTrailer) else: # <tag></tag>; repl=R => block=[], R # <tag></tag>after; repl=R => block=[], Rafter # b4<tag></tag>; repl=R => block=[], b4R # b4<tag></tag>after; repl=R => block=[], b4Rafter # <tag>\n</tag>; repl=R => block=[""], R # <tag>\n</tag>after; repl=R => block=[""], R\nafter # b4<tag>\n</tag>; repl=R => block=[""], b4\nR # b4<tag>\n</tag>after; repl=R => block=[""], b4\nR\nafter # b4<tag>x\n</tag>after; repl=R => block=["x"], b4\nR\nafter # b4<tag>x\ny</tag>after; repl=R => block=["x","y"], b4\nR\nafter if startLineNumber == endLineNumber: # On the same line, prefix & suffix on same line replacement[0] = startLineStart + replacement[0] replacement[-1] = replacement[-1] + endLineTrailer else: # On different lines, prefix & suffix on different lines if startLineStart != "": replacement.insert(0, startLineStart) if endLineTrailer != "": replacement.append(endLineTrailer) lines[i:j+1] = replacement # Keep going, in case there are more tags on the same line. # Danger! One consequence is recursion, we will expand an # expanded tag. Would be nice to keep going, but only on the # end of the current line... #i += len(replacement) return lines
def chapterTemplates(self, lines, properties, meta): # Figure out what template name to use. if "template-chapter" in properties: chapterTemplateName = properties["template-chapter"] else: chapterTemplateName = "default" if "template-chapter-first" in properties: chapterTemplateNameFirst = properties["template-chapter-first"] else: chapterTemplateNameFirst = "default-first" dprint(1, "Chapter Template: Using first: " + chapterTemplateNameFirst + \ ", subsequent: " + chapterTemplateName) # Now we can set the globals, since we have now extracted all the metadata self.setGlobals(meta) # Figure out which templates we are going to use. tFirst = self.get([ "chapter" ], chapterTemplateNameFirst) t = self.get([ "chapter" ], chapterTemplateName) regexMacro = re.compile("<expand-macro\s+(.*?)/?>") i = 0 first = True while i < len(lines): line = lines[i] if line.startswith("<chap-head"): keys = {} opts, keys["chap-head"] = parseLineEntry("chap-head", line) j = i+1 while j < len(lines) and re.match(lines[j], "^\s*$"): j += 1 if j == len(lines): fatal("End of file after <chap-head>") if opts != "": attributes = parseTagAttributes("chap-head", opts, [ "vars" ]) dprint(1, "<chap-head> attributes: " + str(attributes)) if "vars" in attributes: vars = parseOption("chap-head", attributes["vars"]) dprint(1, "<chap-head> vars: " + str(vars)) keys.update(vars) line = lines[j] if line.startswith("<sub-head"): opts, keys["sub-head"] = parseLineEntry("sub-head", line) else: # Do not eat this line! j -= 1 # If the first we've seen, it starts the book if first: templ = tFirst first = False else: templ = t dprint(1, "expand keys: " + str(keys)) replacement = templ.expand(keys) dprint(2, "replace " + str(lines[i:j+1]) + " with " + str(replacement)) lines[i:j+1] = replacement i += len(replacement) continue if line.startswith("<sub-head>"): fatal("Found <sub-head> not after a <chap-head>: " + line) # What about multiple macro expansions on a line? Or recursion? # Make it simpler for now by just punting: if you expand, then we move on # to the next line. m = regexMacro.search(line) if m: opts = m.group(1) attributes = parseTagAttributes("expand-macro", opts, [ "name", "vars" ]) if not "name" in attributes: fatal("expand-macro: No macro name given: " + line) template = self.get([ "macro" ], attributes["name"]) if "vars" in attributes: keys = parseOption("expand-macro", attributes["vars"]) else: keys = {} replacement = template.expand(keys) prefix = line[:m.start(0)] suffix = line[m.end(0):] if len(replacement) == 0: # If the template returns nothing, then you end up with a single line of # the prefix and suffix around the <expand-macro> replacement = [ prefix + suffix ] else: # Otherwise the prefix goes on the first line; and the suffix at the end of # the last; which might be the same single line. replacement[0] = prefix + replacement[0] replacement[-1] = replacement[-1] + suffix lines[i:i+1] = replacement i += len(replacement) continue i += 1
def processLine(i, line): nonlocal fnc, footnotec # Process <fn> tags, fixing id='#' with an appropriate number # Loop, can be multiple on a line. off = 0 while True: m = matchFN.search(line, off) if not m: break opts = m.group(1) args = parseTagAttributes("fn", opts, [ "id" ]) if not "id" in args: fatal("<fn> does not have id attribute: " + line) id = args["id"] target = None if id == '#': id = str(fnc) if mode == asterisk and fnc <= len(footnoteMarkers): displayid = footnoteMarkers[fnc-1] else: displayid = "[" + id + "]" fnc += 1 else: if id in footnoteMarkers: i = footnoteMarkers.index(id) displayid = id # Don't add the square brackets target = footnoteMarkersText[i] else: displayid = "[" + id + "]" if target == None: target = id if reset: nonlocal footnoteChapter target += "_" + str(footnoteChapter) opts = "id='" + displayid + "' target='" + target + "'" l = line[:m.start(0)] + "<fn " + opts + ">" off = len(l) # Start next loop after end of this line = l + line[m.end(0):] # Are we going to emit it here? # Always emit if we hit a genfootnotes, # emit when we hit a heading, but only in heading mode. emit = False if line.startswith("<genfootnotes>"): emit = True line = None # Remove the line, we don't want it! elif emitAtHeading: if line.startswith("<heading"): emit = True # If there weren't any, forget it, nothing to do if len(notes) == 0: emit = False if not emit: if line == None: return [] else: return [ line ] all = formatNotes(line) # If our mode is reset, then whenever we emit, we reset our counters if reset: fnc = 1 footnotec = 1 footnoteChapter += 1 return all
def relocateFootnotes(block): #self.dprint(1, "relocate footnotes") none = 1 heading = 2 headingReset = 3 marker = 4 asterisk = 5 options = { 'none':none, 'heading':heading, 'heading-reset':headingReset, 'marker':marker, 'asterisk':asterisk, } tagValue = config.uopt.getopt("footnote-location") if tagValue == '': mode = none elif tagValue in options: mode = options[tagValue] else: fatal("footnote-location option " + tagValue + " is not legal. Valid values are: none, heading, heading-reset, marker.") notes = [] fnc = 1 footnotec = 1 footnoteChapter = 1 reset = (mode == headingReset or mode == asterisk) emitAtHeading = (mode == heading or reset) matchFN = re.compile("<fn\s+(.*?)/?>") # When we hit a footnote block, accumulate it, and clear it from # the current text. # Handle auto-incrementing footnote ids def footnoteRelocate(opts, block): opts = opts.strip() # Parse and recreate the footnote tag, to handle autonumbering footnotes args = parseTagAttributes("footnote", opts, [ "id" ]) if not "id" in args: fatal("<footnote> does not have id attribute: " + opts) id = args["id"] target = None if id == '#': nonlocal footnotec id = str(footnotec) if mode == asterisk and footnotec <= len(footnoteMarkers): displayid = footnoteMarkers[footnotec-1] else: displayid = "[" + id + "]" footnotec += 1 else: if id in footnoteMarkers: i = footnoteMarkers.index(id) displayid = id # Don't add the square brackets target = footnoteMarkersText[i] if not reset: fatal("Use of explicit footnote symbols requires footnote-location to be set to either asterisk or heading-reset: " + str(opts)) else: displayid = "[" + id + "]" if target == None: target = id if reset: target += "_" + str(footnoteChapter) opts = "id='" + displayid + "' target='" + target + "'" # Handle fn tags inside footnotes! relocateFootnotes(block) # Recreate the block block.insert(0, "<footnote " + opts + ">") block.append("</footnote>") # If we aren't supposed to move footnotes, do nothing if mode == none: return block # Otherwise accumulate them for emitting elsewhere nonlocal notes notes.append(block) # Clear the current location of the footnote return [] # Method called on every line. def processLine(i, line): nonlocal fnc, footnotec # Process <fn> tags, fixing id='#' with an appropriate number # Loop, can be multiple on a line. off = 0 while True: m = matchFN.search(line, off) if not m: break opts = m.group(1) args = parseTagAttributes("fn", opts, [ "id" ]) if not "id" in args: fatal("<fn> does not have id attribute: " + line) id = args["id"] target = None if id == '#': id = str(fnc) if mode == asterisk and fnc <= len(footnoteMarkers): displayid = footnoteMarkers[fnc-1] else: displayid = "[" + id + "]" fnc += 1 else: if id in footnoteMarkers: i = footnoteMarkers.index(id) displayid = id # Don't add the square brackets target = footnoteMarkersText[i] else: displayid = "[" + id + "]" if target == None: target = id if reset: nonlocal footnoteChapter target += "_" + str(footnoteChapter) opts = "id='" + displayid + "' target='" + target + "'" l = line[:m.start(0)] + "<fn " + opts + ">" off = len(l) # Start next loop after end of this line = l + line[m.end(0):] # Are we going to emit it here? # Always emit if we hit a genfootnotes, # emit when we hit a heading, but only in heading mode. emit = False if line.startswith("<genfootnotes>"): emit = True line = None # Remove the line, we don't want it! elif emitAtHeading: if line.startswith("<heading"): emit = True # If there weren't any, forget it, nothing to do if len(notes) == 0: emit = False if not emit: if line == None: return [] else: return [ line ] all = formatNotes(line) # If our mode is reset, then whenever we emit, we reset our counters if reset: fnc = 1 footnotec = 1 footnoteChapter += 1 return all def formatNotes(line): nonlocal notes # Emit a footnote mark, then all the footnotes, then a blank line, # then this current line which triggered us all = [ "<hr rend='footnotemark'>" ] for block in notes: all.extend(block) all.append("") # Blank line between footnotes and after if line != None: all.append(line) # We emitted, so clear the current footnotes notes = [] return all parseStandaloneTagBlock(block, "footnote", footnoteRelocate, lineFunction = processLine) # Anything left when we get to the end of the file? i.e. last chapter? if len(notes) != 0: block += formatNotes(None)