def handle_caps(element): """Handle small caps. Replace '°xxx' by '<span class="sc">xxx</span>'. """ import re pattern = r"([^°]*)°([^\s\.,)+/:]*)(.*)" # Find text to display in small caps result = re.match(pattern, element.attrib["val"].encode(ENCODING)) # Initialize loop variables previous_span = None index = 0 while result: before = result.group(1).decode(ENCODING) sc = result.group(2).decode(ENCODING) after = result.group(3).decode(ENCODING) # Handle previous span or element if previous_span is None: element.text = before else: previous_span.tail = before # Create span span = Element("span") span.attrib["class"] = "sc" span.text = sc # Insert span in element element.insert(index, span) # Update result result = re.match(pattern, after.encode(ENCODING)) if not result: span.tail = after # Update loop variables previous_span = span index += 1 return element
def handle_font(element): """Replace '{xxx}' by '<span class="ipa">xxx</span>'. """ import re # Find text to display in IPA pattern = r"([^{}]*){([^}]*)}(.*)" result = re.match(pattern, element.attrib["val"]) # Initialize loop variables previous_span = None index = 0 while result: before = result.group(1) ipa = result.group(2) after = result.group(3) # Handle previous span or element if previous_span is None: element.text = before else: previous_span.tail = before # Create span span = Element("span") span.attrib["class"] = "ipa" span.text = ipa # Insert span in element element.insert(index, span) # Update result result = re.match(pattern, after) if not result: span.tail = after # Update loop variables previous_span = span index += 1 return element
def handle_pinyin(element): """Replace '@xxx' by '<span class="pinyin">xxx</span>'. """ import re # Find pinyin pattern = r"([^@]*)@(\w*)(.*)" result = re.match(pattern, element.attrib["val"]) # Initialize loop variables previous_span = None index = 0 while result: before = result.group(1) pinyin = result.group(2) after = result.group(3) # Handle previous span or element if previous_span is None: element.text = before else: previous_span.tail = before # Create span span = Element("span") span.attrib["class"] = "pinyin" span.text = pinyin # Insert span in element element.insert(index, span) # Update result result = re.match(pattern, after) if not result: span.tail = after # Update loop variables previous_span = span index += 1 return element
def handle_fv(element): """Replace 'fv:xxx' and '|fv{xxx}' by '<span class="vernacular">xxx</span>'. """ import re # Find text to display in vernacular font pattern = r"(([^:\|]*)fv:([^\s\.,)]*)(.*))|(([^:\|]*)\|fv{([^}]*)}(.*))" result = re.match(pattern, element.attrib["val"]) # Initialize loop variables previous_span = None index = 0 while result: if result.group(1) is not None: before = result.group(2) vernacular = result.group(3) after = result.group(4) elif result.group(5) is not None: before = result.group(6) vernacular = result.group(7) after = result.group(8) # Handle previous span or element if previous_span is None: element.text = before else: previous_span.tail = before # Create span span = Element("span") span.attrib["class"] = "vernacular" span.text = vernacular # Insert span in element element.insert(index, span) # Update result result = re.match(pattern, after) if not result: span.tail = after # Update loop variables previous_span = span index += 1 return element