def test_dot_export_including_annotation(self): witness_a = "<tei><s1>x y z</s1></tei>" witness_b = "<tei><s2>x</s2>y<s3>z</s3></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) expected_out = """strict digraph TextGraph { 1 [label="x"] 2 [label="y"] 3 [label="z"] 1 -> 2 2 -> 3 { rank=same; 1; 2; 3 } a1 [label="tei"] a2 [label="s1"] a3 [label="s2"] a4 [label="s3"] a2 -> a1 a3 -> a1 1 -> a2 1 -> a3 a2 -> a1 2 -> a2 a2 -> a1 a4 -> a1 3 -> a2 3 -> a4 }""" # TODO: There are some duplication annotation edges here that should be removed! (a2 - a1) # NOTE: For now we work around the problem by adding the "strict" keyword to the DOT export. self.assertEqual(expected_out, dot_export)
def collate_xml(witness_a, witness_b): tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) dot = Source(dot_export, format="svg") svg = dot.render() return display(SVG(svg))
def collate_xml_svg(limit=1000): # convert XML files into tokens tokens1 = convert_xml_file_into_tokens("xml_source_transcriptions/ts-fol-test-small.xml") tokens2 = convert_xml_file_into_tokens("xml_source_transcriptions/tsq-test-small.xml") superwitness = align_tokens_and_return_superwitness(tokens1, tokens2) print(superwitness[0:20]) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True, limit=limit) # print(dot_export) # render dot_export as SVG dot = Source(dot_export, format="svg") svg = dot.render() return display(SVG(svg))
def collate_xml_example(): # convert XML files into tokens tokens1 = convert_xml_file_into_tokens("../xml_source_transcriptions/ts-fol-test-small.xml") tokens2 = convert_xml_file_into_tokens("../xml_source_transcriptions/tsq-test-small.xml") # log print(tokens1) print(tokens2) # end superwitness = align_tokens_and_return_superwitness(tokens1, tokens2) # log print(superwitness) # end textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) print(dot_export)
def test_dot1_textnodes_only(self): witness_a = "<tei><s>x y z</s></tei>" witness_b = "<tei><s>x</s>y<s>z</s></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph) expected_out = """strict digraph TextGraph { 1 [label="x"] 2 [label="y"] 3 [label="z"] 1 -> 2 2 -> 3 { rank=same; 1; 2; 3 } }""" self.assertEqual(expected_out, dot_export)