def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None): if not astar: algorithm = EditGraphAligner(collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "svg": return display_variant_graph_as_SVG(graph) if output=="graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table else: raise Exception("Unknown output type: "+output)
def test_superbase_generation_multiple_short_witnesses(self): collation = Collation() collation.add_plain_witness("A", "a") collation.add_plain_witness("B", "b") collation.add_plain_witness("C", "c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph)
def test_superbase(self): collation = Collation() collation.add_plain_witness("A", "X a b c d e f X g h i Y Z j k") collation.add_plain_witness("B", "a b c Y d e f Y Z g h i X j k") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph, collation) superbase = aligner.new_superbase self.assertSuperbaseEquals("X a b c Y d e f X Y Z g h i Y Z X j k", superbase)
def test_superbase_generation_multiple_short_witnesses(self): collation = Collation() collation.add_plain_witness("A", "a") collation.add_plain_witness("B", "b") collation.add_plain_witness("C", "c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph, collation) superbase = aligner.new_superbase self.assertSuperbaseEquals("a b c", superbase)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError('segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)
def testOmission2GlobalScore(self): collation = Collation() collation.add_plain_witness("A", "a a b c") collation.add_plain_witness("B", "a b c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph) table = aligner.table self.assertRow([0, -1, -2, -3, -4], table[0]) self.assertRow([-1, 0, -1, -2, -3], table[1]) self.assertRow([-2, -1, -2, -1, -2], table[2]) self.assertRow([-3, -2, -3, -2, -1], table[3])
def testOmission2GlobalScore(self): collation = Collation() collation.add_plain_witness("A", "a a b c") collation.add_plain_witness("B", "a b c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph, collation) table = aligner.table self.assertRow([0, -1, -2, -3, -4], table[0]) self.assertRow([-1, 0, -1, -2, -3], table[1]) self.assertRow([-2, -1, -2, -1, -2], table[2]) self.assertRow([-3, -2, -3, -2, -1], table[3])
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, debug_scores=False): algorithm = EditGraphAligner(collation, near_match=near_match, astar=astar, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if collation.pretokenized and not segmentation: token_list = [[tk.token_data for tk in witness.tokens()] for witness in collation.witnesses] # only with segmentation=False # there could be a different comportment of get_tokenized_table if semgentation=True table = get_tokenized_at(table, token_list, segmentation=segmentation, layout=layout) # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict) if output == "table" or output == "html": for row in table.rows: row.cells = [cell["t"] for cell in row.cells] if output == "json": return export_alignment_table_as_json(table, layout=layout) if output == "html": return display_alignment_table_as_HTML(table) if output == "table": return table else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None): if not astar: algorithm = EditGraphAligner( collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "svg": return display_variant_graph_as_SVG(graph) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table else: raise Exception("Unknown output type: " + output)
def testOmission(self): collation = Collation() collation.add_plain_witness("A", "a b c") collation.add_plain_witness("B", "b c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph, collation) table = aligner.table # self.debug_table(aligner, table) self.assertEqual(0, table[0][0].g) self.assertEqual(-1, table[0][1].g) self.assertEqual(-2, table[0][2].g) self.assertEqual(-3, table[0][3].g) self.assertEqual(-1, table[1][0].g) self.assertEqual(-2, table[1][1].g) self.assertEqual(-1, table[1][2].g) self.assertEqual(-2, table[1][3].g) self.assertEqual(-2, table[2][0].g) self.assertEqual(-3, table[2][1].g) self.assertEqual(-2, table[2][2].g) self.assertEqual(-1, table[2][3].g)
def testOmission(self): collation = Collation() collation.add_plain_witness("A", "a b c") collation.add_plain_witness("B", "b c") aligner = EditGraphAligner(collation) graph = VariantGraph() aligner.collate(graph) table = aligner.table # self.debug_table(aligner, table) self.assertEqual(0, table[0][0].g) self.assertEqual(-1, table[0][1].g) self.assertEqual(-2, table[0][2].g) self.assertEqual(-3, table[0][3].g) self.assertEqual(-1, table[1][0].g) self.assertEqual(-2, table[1][1].g) self.assertEqual(-1, table[1][2].g) self.assertEqual(-2, table[1][3].g) self.assertEqual(-2, table[2][0].g) self.assertEqual(-3, table[2][1].g) self.assertEqual(-2, table[2][2].g) self.assertEqual(-1, table[2][3].g)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, debug_scores=False): scorer = Scorer(collation, near_match) algorithm = EditGraphAligner(collation, scorer, astar=astar, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "svg": return display_variant_graph_as_SVG(graph) if output=="graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "table": return table else: raise Exception("Unknown output type: "+output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError('segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if collation.pretokenized and not segmentation: token_list = [[tk.token_data for tk in witness.tokens()] for witness in collation.witnesses] # only with segmentation=False # there could be a different comportment of get_tokenized_table if semgentation=True table = get_tokenized_at(table, token_list, segmentation=segmentation, layout=layout) # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict) if output == "table" or output == "html": for row in table.rows: row.cells = [cell["t"] for cell in row.cells] if output == "json": return export_alignment_table_as_json(table, layout=layout) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)