def ensurenl(i, e): ''' Ensure element ends with a newline, if not already present. Passed index in parent and element. ''' if isinstance(e, sfm.Element): if len(e): ensurenl(len(e) - 1, e[-1]) else: e.append(sfm.Text("\n", e.pos, e.parent)) return True elif not e.endswith("\n"): e.parent.insert(i + 1, sfm.Text("\n", e.pos, e.parent)) return True return False
def _insertVerse(self, job, parent, offset, e): job.parent = parent if offset != 0: last = parent[offset - (1 if offset > 0 else 0)] if isinstance(last, sfm.Text) and last.data[-1] in " \n": newstr = last.data.rstrip() lc = last.data[len(newstr):] last.data = newstr else: lc = None else: lc = None if offset >= 0: parent.insert(offset, job) offset += 1 else: parent.append(job) if lc is not None: t = sfm.Text(lc, pos=e.pos, parent=parent) if offset > 0: parent.insert(offset, t) offset += 1 else: parent.append(t) return offset
def versesToEnd(self): it = self.iiterel(0, self.doc[0]) currjob = None lastpara = None thispara = None for i, e in it: if not isinstance(e, sfm.Element): continue etype = e.meta.get('texttype', '').lower() style = e.meta.get('styletype', '').lower() if style == 'paragraph' and etype == 'versetext': # if e.parent is not None and e.parent.name == 'p': # print(e) thispara = e offset = 0 elif style == 'paragraph': if currjob is not None: self._insertVerse(currjob, lastpara, -1, e) currjob = None thispara = None offset = 0 if e.name == 'v': if currjob is not None: if thispara is not None and i > 1: offset = self._insertVerse(currjob, thispara, i + offset, e) - i else: self._insertVerse(currjob, lastpara, -1, e) currjob = sfm.Element('vp', e.pos, parent=e.parent, meta=self.sheets['vp']) currjob.append(sfm.Text(e.args[0], pos=e.pos, parent=currjob)) if thispara is not None and thispara is not e: lastpara = thispara if currjob is not None: lastpara.append(currjob) lastpara.append(sfm.Text("\n", parent=lastpara))
def fn(e): s = str(e) processed = False for r in regs: if r[0] is not None and not r[0](e.parent): continue ns = r[1].sub(r[2], s) if ns != s: processed = True s = ns if processed: return sfm.Text(s, e.pos, e.parent) else: return e
def _g(a, e): if isinstance(e, sfm.Text): if pred(e): a.append(sfm.Text(e, e.pos, a or None)) return a if e.name in removes: return a e_ = sfm.Element(e.name, e.pos, e.args, parent=a or None, meta=e.meta) reduce(_g, e, e_) if pred(e): a.append(e_) elif len(e_): a.extend(e_[:]) return a
def fn(e): if not e.parent or not isScriptureText(e.parent): return e done = False lastspace = id(e.parent[0]) != id(e) res = [] for (islet, c) in groupby(str(e), key=lambda x: get_ucd(ord(x), "gc") in takslc_cats and x != "|"): chars = "".join(c) # print("{} = {}".format(chars, islet)) if not len(chars): continue if islet: res.append(("" if lastspace else inschar) + inschar.join(chars)) done = True else: res.append(chars) lastspace = get_ucd(ord(chars[-1]), "InSC") in ("Invisible_Stacker", "Virama") \ or get_ucd(ord(chars[-1]), "gc") in ("Cf", "WS") \ or chars[-1] in (r"\|") return sfm.Text("".join(res), e.pos, e.parent) if done else e
def new_element(self, e, name, content): return sfm.Element(name, e.pos, [], e.parent, content=[sfm.Text("\n", e.pos)] \ + content, meta=self.sheets[name])
def parse_element(self, e): if isinstance(e, sfm.Text): t = self.localise_re.sub(self.localref, str(e)) if t != e: return [sfm.Text(t, e.pos, e.parent)] return [e] elif e.name == "ref" or e.name == "refnp": res = [] isidparent = e.parent is None or e.parent.name == "id" reps = [] curr = e.parent.index(e) while curr + 1 < len(e.parent): rep = e.parent[curr + 1] if not isinstance(rep, sfm.Element) or rep.name != "rep": break # parse rep m = re.match("^\s*(.*?)\s*=>\s*(.*?)\s*$", str(rep[0]), re.M) if m: reps.append( (None, re.compile(r"\b" + m.group(1).replace("...", "[^\n\r]+") + "(\\b|(?=\\s)|$)"), m.group(2))) del e.parent[curr + 1] for r in RefList.fromStr(str(e[0]), context=self.usfms.booknames): p = self.get_passage(r, removes=self.removes, strippara=e.name == "refnp") if e.name == "ref": for i, t in enumerate(p): if isinstance(t, sfm.Element) and t.meta.get( 'StyleType', '').lower() == 'paragraph': if i: p[0:i] = [ self.new_element( e, "p1" if isidparent else "p", p[0:i]) ] break else: p = [ self.new_element(e, "p1" if isidparent else "p", p) ] res.extend(p) if len(reps): res = self.doc.transform_text(*reps, doc=res) return res elif e.name == 'inc': s = "".join(map(str, e)).strip() for c in s: if c == "-": self.removes = set(sum(exclusionmap.values(), [])) else: self.removes.difference_update(exclusionmap.get(c, [])) elif e.name == 'mod': mod = Module(e[0].strip(), self.usfms) return mod.parse() else: cs = sum(map(self.parse_element, e), []) e[:] = cs return [e]
def iterfn(el, top=False): if isinstance(el, sfm.Element): lastv = None predels = [] for c in el[:]: if not isinstance(c, sfm.Element) or c.name != "v": if iterfn(c): # False if deletable ~> empty if len(predels): if isinstance(predels[-1], sfm.Element) \ and predels[-1].name == "p" \ and len(predels[-1]) == 1 \ and str(predels[-1][0]).strip() == "...": predels.pop(-1) for p in predels: if isinstance(p, sfm.Element): p.parent.remove(p) lastv = None predels = [] else: predels.append(c) elif isinstance(c, sfm.Element) and c.name == "v": if lastv is not None: for p in predels: p.parent.remove(p) predels = [] if ellipsis: i = lastv.parent.index(lastv) ell = sfm.Text("...", parent=lastv.parent) lastv.parent.insert(i, ell) predels.append(ell) lastv.parent.pop(i + 1) else: lastv.parent.remove(lastv) lastv = c if lastv is not None: lastv.parent.remove(lastv) res = len(el) != 0 nonemptypredels = [ p for p in predels if isinstance(p, sfm.Element) or not re.match(r"^\s*$", str(p)) ] ell = None if len(nonemptypredels): if ellipsis: p = nonemptypredels[0] i = p.parent.index(p) st = p.parent.meta.get("styletype", "") if st is None or st.lower() == "paragraph": ell = sfm.Text("...", parent=p.parent) else: ell = sfm.Element('p', parent=p.parent, meta=self.sheets['p']) ell.append(sfm.Text("...\n", parent=ell)) p.parent.insert(i, ell) for p in predels: p.parent.remove(p) predels = [ell] if ell is not None else [] st = el.meta.get("styletype", "") if (st is None or st.lower() == "paragraph") and len(el) == len(predels): # el.parent.remove(el) return True if st is None else False # To handle empty markers like \pagebreak elif re.match(r"^\s*$", str(el)) or re.match( r"\.{3}\s*$", str(el)): return False return True