def test_process_same_sub_level(): with XMLBuilder('APPENDIX') as ctx: ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.P("1. 1 1 1") ctx.P("a. 1a 1a 1a") ctx.P("b. 1b 1b 1b") ctx.P("c. 1c 1c 1c") ctx.P("d. 1d 1d 1d") ctx.P("e. 1e 1e 1e") ctx.P("f. 1f 1f 1f") ctx.P("2. 2 2 2") ctx.P("a. 2a 2a 2a") ctx.P("i. 2ai 2ai 2ai") ctx.P("ii. 2aii 2aii 2aii") ctx.P("a. 2aiia 2aiia 2aiia") ctx.P("b. 2aiib 2aiib 2aiib") ctx.P("c. 2aiic 2aiic 2aiic") ctx.P("d. 2aiid 2aiid 2aiid") ctx.P("b. 2b 2b 2b") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['1', '2'] assert appendix['1'].child_labels == ['a', 'b', 'c', 'd', 'e', 'f'] assert appendix['2'].child_labels == ['a', 'b'] assert appendix['2']['a'].child_labels == ['i', 'ii'] assert appendix['2']['a']['i'].children == [] assert appendix['2']['a']['ii'].child_labels == ['a', 'b', 'c', 'd'] assert appendix['2']['b'].children == []
def test_process_appendix_header_is_paragraph(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.HD("A-1 - First kind of awesome", SOURCE='HD2') ctx.HD("(A) First Subkind", SOURCE='HD3') ctx.P("1. Content") ctx.HD("(B) Next Subkind", SOURCE='HD3') ctx.P("1. Moar Contents") ctx.HD("I. Remains Header", SOURCE='HD3') ctx.P("1. Content tent") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.label == ['1111', 'A'] assert appendix.child_labels == ['1'] assert appendix['1'].child_labels == ['A', 'B'] assert appendix['1'].title == 'A-1 - First kind of awesome' assert appendix['1']['A'].child_labels == ['1'] assert appendix['1']['A'].text == '(A) First Subkind' assert appendix['1']['A']['1'].text == '1. Content' assert appendix['1']['B'].child_labels == ['1'] assert appendix['1']['B'].text == '(B) Next Subkind' assert appendix['1']['B']['1'].text == '1. Moar Contents' assert appendix['1']['B']['1'].child_labels == ['h1'] assert appendix['1']['B']['1']['h1'].title == 'I. Remains Header' assert appendix['1']['B']['1']['h1'].child_labels == ['1'] assert appendix['1']['B']['1']['h1']['1'].text == '1. Content tent'
def test_process_appendix_fp_dash(): with XMLBuilder("APPENDIX") as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE="HED") ctx.FP("FP-DASH filled out with dashes", SOURCE="FP-DASH") appendix = appendices.process_appendix(ctx.xml, 1111) assert len(appendix.children) == 1 fp_dash = appendix.children[0] assert fp_dash.text.strip() == 'FP-DASH filled out with dashes_____'
def test_process_notes(): with XMLBuilder('APPENDIX') as ctx: ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') with ctx.NOTE(): ctx.P("Par") ctx.E("Emem") ctx.P("Parparpar") appendix = appendices.process_appendix(ctx.xml, 1111) assert appendix.label == ['1111', 'A'] assert len(appendix.children) == 1 note = appendix.children[0] assert note.text == '```note\nPar\nEmem\nParparpar\n```'
def test_process_code(): with XMLBuilder('APPENDIX') as ctx: ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') with ctx.CODE(LANGUAGE='scala'): ctx.P("// Non-tail-recursive list reverse") ctx.FP("def rev[A](lst: List[A]):List[A] =", SOUURCE='FP-2') ctx.FP("lst match {", SOURCE='FP-2') ctx.FP(" case Nil => Nil", SOURCE='FP-2') ctx.FP(" case head :: tail =>", SOURCE='FP-2') ctx.FP(" rev(tail) ++ List(head)", SOURCE='FP-2') ctx.FP("}", SOURCE='FP-2') appendix = appendices.process_appendix(ctx.xml, 1111) assert appendix.label == ['1111', 'A'] assert len(appendix.children) == 1 code = appendix.children[0] text = "\n".join(p.text.strip() for p in ctx.xml.xpath("//P | //FP")) assert code.text == "```scala\n" + text + "\n```"
def test_header_ordering(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.HD("A-1 Content", SOURCE='HD1') ctx.HD("Level 1", SOURCE='HD3') ctx.HD("Level 2", SOURCE='HD2') # Note HD3 then HD2 ctx.P("Paragraph") ctx.HD("A-1(A) More Content", SOURCE='HD1') ctx.P("A1A Paragraph") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['1', '1(A)'] assert appendix['1'].child_labels == ['h1'] assert appendix['1']['h1'].child_labels == ['h2'] assert appendix['1']['h1']['h2'].child_labels == ['p1'] assert appendix['1']['h1']['h2']['p1'].children == []
def test_process_appendix_header_depth(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.P("1. Some content") ctx.HD("An Interruption", SOURCE='HD3') ctx.P("Moo") ctx.P("2. More content") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.label == ['1111', 'A'] assert appendix.child_labels == ['1', '2'] assert appendix['1'].child_labels == ['h1'] assert appendix['1'].text == '1. Some content' assert appendix['2'].children == [] assert appendix['2'].text == '2. More content'
def test_process_spaces(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.child_from_string('<P>1. For<PRTPAGE P="650" />example</P>') with ctx.P("2. And "): ctx.E("et seq.", T="03") with ctx.P("3. And"): ctx.E("et seq.", T="03") ctx.child_from_string('<P>More<PRTPAGE P="651" />content</P>') with ctx.P("And"): ctx.E("et seq.", T="03") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['1', '2', '3', 'p1', 'p2'] for child in appendix.children: assert child.children == [] assert appendix['1'].text == '1. For example' assert appendix['2'].text == '2. And et seq.' assert appendix['3'].text == '3. And et seq.' assert appendix['p1'].text == 'More content' assert appendix['p2'].text == 'And et seq.'
def parse_appendix(xml, cfr_part, letter): """Attempt to parse an appendix. Used when the entire appendix has been replaced/added or when we can use the section headers to determine our place. If the format isn't what we expect, display a warning.""" xml = deepcopy(xml) hds = xml.xpath('//HD[contains(., "Appendix {0} to Part {1}")]'.format( letter, cfr_part)) if len(hds) == 0: logger.warning("Could not find Appendix %s to part %s", letter, cfr_part) elif len(hds) > 1: logger.warning("Too many headers for %s to part %s", letter, cfr_part) else: hd = hds[0] hd.set('SOURCE', 'HED') extract = hd.getnext() if extract is not None and extract.tag == 'EXTRACT': extract.insert(0, hd) for trailing in dropwhile(lambda n: n.tag != 'AMDPAR', extract.getchildren()): extract.remove(trailing) return process_appendix(extract, cfr_part) logger.warning("Bad format for whole appendix")
def test_process_appendix(): """Integration test for appendices""" with XMLBuilder("APPENDIX") as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE="HED") ctx.P("Intro text") ctx.HD("Header 1", SOURCE="HD1") ctx.P("Content H1-1") ctx.P("Content H1-2") ctx.HD("Subheader", SOURCE="HD2") ctx.P("Subheader content") with ctx.HD("Header ", SOURCE="HD1"): ctx.E("2", T="03") ctx.P("www.example.com") with ctx.P("Final "): ctx.E("Content", T="03") with ctx.GPH(): ctx.PRTPAGE(P="650") ctx.GID("MYGID") with ctx.GPOTABLE(CDEF="s50,15,15", COLS="3", OPTS="L2"): with ctx.BOXHD(): with ctx.CHED("For some reason", H="1"): ctx.LI("lis") ctx.CHED("column two", H="2") ctx.CHED("a third column", H="2") with ctx.ROW(): ctx.ENT("0", I="01") ctx.ENT() ctx.ENT("Content3") with ctx.ROW(): ctx.ENT("Cell 1") ctx.ENT("Cell 2") ctx.ENT("Cell 3") ctx.FP("A-3 Some header here", SOURCE="FR-1") ctx.P("Content A-3") ctx.P("A-4 Another header") ctx.P("Content A-4") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['p1', 'h1', 'h3', '3', '4'] assert appendix['p1'].children == [] assert appendix['p1'].text == "Intro text" assert appendix['h1'].child_labels == ['p2', 'p3', 'h2'] assert appendix['h1'].title == 'Header 1' assert appendix['h1']['p2'].children == [] assert appendix['h1']['p2'].text == 'Content H1-1' assert appendix['h1']['p3'].children == [] assert appendix['h1']['p3'].text == 'Content H1-2' assert appendix['h1']['h2'].child_labels == ['p4'] assert appendix['h1']['h2'].title == 'Subheader' assert appendix['h1']['h2']['p4'].text == 'Subheader content' assert appendix['h3'].child_labels == ['p5', 'p6', 'p7', 'p8'] assert appendix['h3'].title == 'Header 2' assert appendix['h3']['p5'].text == 'www.example.com' assert appendix['h3']['p6'].text == 'Final Content' assert appendix['h3']['p7'].text == '![](MYGID)' table_lines = appendix['h3']['p8'].text.split('\n') assert table_lines[0] == '|For some reason lis|column two|a third column|' assert table_lines[1] == '|---|---|---|' assert table_lines[2] == '|0||Content3|' assert table_lines[3] == '|Cell 1|Cell 2|Cell 3|' assert appendix['3'].title == 'A-3 Some header here' assert appendix['4'].title == 'A-4 Another header'