def traverse(iNode): if (iNode.tag == 'document') or ((iNode.tag == 'content') and (iNode.getparent().tag == 'document')) or ((iNode.tag == 'section') and (iNode.getparent().tag != 'activity')): children = iNode.getchildren() for child in children: if not traverse(child): iNode.remove(child) return True elif (iNode.tag == 'title') and (iNode.getparent().tag == 'section'): return True elif iNode.tag == 'exercises': for correctNode in iNode.xpath('.//correct'): fullDomPath = get_full_dom_path(correctNode) if 'solution' in fullDomPath: correctNode.tail = '' entryNode = correctNode.getparent() while entryNode.tag != 'entry': entryNode = entryNode.getparent() if 'latex' in fullDomPath: remove_ampersands(correctNode) correctNode.tag = 'latex' entryNode.append(etree.Element('correct')) entryNode[-1].append(correctNode) else: entryNode.append(correctNode) for tag in ['header', 'footer', 'shortcode']: for child in iNode.xpath('.//' + tag): child.getparent().remove(child) for tag in ['problem', 'solution']: for child in iNode.xpath('.//' + tag): child.clear() for child in iNode.xpath('.//problem-set'): if child.attrib.get('{http://siyavula.com/cnxml/style/0.1}columns') is not None: del child.attrib['{http://siyavula.com/cnxml/style/0.1}columns'] return True else: if len(iNode.xpath('.//exercises')) > 0: sys.stderr.write('WARNING: Deleting node that contains exercises.\n') sys.stderr.write(etree.tostring(iNode) + '\n') return False
def __get_full_dom_path(self, iNode): return utils.get_full_dom_path(iNode, iSpec=self.spec)
def find_substitutions(iText, iNode=None, iPreTailTag=None, iPostTailTag=None): global numberAndCurrencyPattern, lazyMode oSubstitutions = [] for match in numberAndCurrencyPattern.finditer(iText): context = [None, None] contextLength = 20 preContextWarning = 0 postContextWarning = 0 start, stop = match.span() oldNumber = iText[start:stop] if oldNumber[0] == 'R': # Currency newNode = etree.Element('currency') newNode.append(etree.Element('number')) newNode[0].text = oldNumber[1:].replace(' ','').replace(',','.') # Check whether we need to use a non-default precision pos = newNode[0].text.find('.') if (pos != -1) and (pos < len(newNode[0].text)-3): newNode.attrib['precision'] = str(len(newNode[0].text) - pos - 1) # Check if the pre-context is funny while (preContextWarning < contextLength) and (start-preContextWarning > 0) and ('a' <= iText[start-preContextWarning-1].lower() <= 'z'): preContextWarning += 1 else: # Number newNode = etree.Element('number') newNode.text = oldNumber.replace(' ','').replace(',','.') # Check if the pre-context is funny offset = 0 while (start-offset > 0) and (iText[start-offset-1] == ' '): offset += 1 if (start-offset > 0) and (iText[start-offset-1] in '.,0123456789'): preContextWarning = min(contextLength, offset+1) while (preContextWarning < contextLength) and (start-preContextWarning > 0) and (iText[start-preContextWarning-1] in '.,0123456789'): preContextWarning += 1 # Check if the post-context is funny if iText[stop:stop+2] in ['st', 'nd', 'rd', 'th']: postContextWarning = 2 else: # Check if there are units match = unitPattern.match(iText[stop:]) if (iNode.tag != 'latex') and (match is not None): numberNode = newNode newNode = etree.Element('unit_number') newNode.append(numberNode) newNode.append(etree.Element('unit')) newNode[-1].text = match.group().strip() oldNumber += iText[stop:stop+len(match.group())] stop += len(match.group()) else: # Check if there are units in LaTeX mode match = latexUnitPattern.match(iText[stop:]) if match is not None: numberNode = newNode newNode = etree.Element('unit_number') newNode.append(numberNode) newNode.append(etree.Element('unit')) unitText = match.group().strip() if unitText == r'\ell': unitText = u'ℓ' elif unitText == u'°': unitText = u'°' else: assert unitText[:8] == r'\textrm{' assert unitText[-1] == '}' unitText = unitText[8:-1].strip() newNode[-1].text = unitText oldNumber += iText[stop:stop+len(match.group())] stop += len(match.group()) else: if lazyMode and ((iNode.tag == 'latex') or ((iNode.tag == 'correct') and ('latex' in get_full_dom_path(iNode)))): # Skip unit-less numbers that are small integers and in LaTeX mode try: value = int(oldNumber.replace(' ','').replace(',','.')) if abs(value) < 1000: continue except ValueError: pass # Check if the post-context is funny (for both currency and number) if postContextWarning == 0: offset = 0 while (stop+offset+1 < len(iText)) and (iText[stop+offset+1] == ' '): offset += 1 if (stop+offset+1 < len(iText)) and (iText[stop+offset+1] in '.,0123456789'): postContextWarning = min(contextLength, offset+1) while (postContextWarning < contextLength) and (stop+postContextWarning+1 < len(iText)) and (iText[stop+postContextWarning+1] in '.,0123456789'): postContextWarning += 1 if iNode.tag is not None: context[0] = '<' + iNode.tag + '>' if iPreTailTag is not None: context[0] += ' ... </' + iPreTailTag + '>' else: context[0] = '' if start < contextLength: context[0] += iText[:start] else: context[0] += ' ...' + iText[start-contextLength:start] if stop+contextLength > len(iText): context[1] = iText[stop:] if iPostTailTag is not None: context[1] += '<' + iPostTailTag + '>' + ' ... ' else: context[1] = iText[stop:stop+contextLength] + '... ' context[1] += '</' + iNode.tag + '>' if prompt_replace(oldNumber, etree.tostring(newNode, encoding='utf-8').decode('utf-8'), context, (preContextWarning, postContextWarning)): oSubstitutions.append((start, stop, newNode)) return oSubstitutions