示例#1
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None):
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "svg":
        return display_variant_graph_as_SVG(graph)
    if output=="graph": 
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: "+output)
 def test_superbase_generation_multiple_short_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("A", "a")
     collation.add_plain_witness("B", "b")
     collation.add_plain_witness("C", "c")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph)
 def test_superbase_generation_multiple_short_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("A", "a")
     collation.add_plain_witness("B", "b")
     collation.add_plain_witness("C", "c")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph)
示例#4
0
 def test_superbase(self):
     collation = Collation()
     collation.add_plain_witness("A", "X a b c d e f X g h i Y Z j k")
     collation.add_plain_witness("B", "a b c Y d e f Y Z g h i X j k")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph, collation)
     superbase = aligner.new_superbase
     self.assertSuperbaseEquals("X a b c Y d e f X Y Z g h i Y Z X j k", superbase)
示例#5
0
 def test_superbase(self):
     collation = Collation()
     collation.add_plain_witness("A", "X a b c d e f X g h i Y Z j k")
     collation.add_plain_witness("B", "a b c Y d e f Y Z g h i X j k")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph, collation)
     superbase = aligner.new_superbase
     self.assertSuperbaseEquals("X a b c Y d e f X Y Z g h i Y Z X j k", superbase)
示例#6
0
 def test_superbase_generation_multiple_short_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("A", "a")
     collation.add_plain_witness("B", "b")
     collation.add_plain_witness("C", "c")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph, collation)
     superbase = aligner.new_superbase
     self.assertSuperbaseEquals("a b c", superbase)
 def test_superbase_generation_multiple_short_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("A", "a")
     collation.add_plain_witness("B", "b")
     collation.add_plain_witness("C", "c")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph, collation)
     superbase = aligner.new_superbase
     self.assertSuperbaseEquals("a b c", superbase)
示例#8
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions,
                                     debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)
    def testOmission2GlobalScore(self):
        collation = Collation()
        collation.add_plain_witness("A", "a a b c")
        collation.add_plain_witness("B", "a b c")
        aligner = EditGraphAligner(collation)
        graph = VariantGraph()
        aligner.collate(graph)
        table = aligner.table

        self.assertRow([0, -1, -2, -3, -4], table[0])
        self.assertRow([-1, 0, -1, -2, -3], table[1])
        self.assertRow([-2, -1, -2, -1, -2], table[2])
        self.assertRow([-3, -2, -3, -2, -1], table[3])
示例#10
0
    def testOmission2GlobalScore(self):
        collation = Collation()
        collation.add_plain_witness("A", "a a b c")
        collation.add_plain_witness("B", "a b c")
        aligner = EditGraphAligner(collation)
        graph = VariantGraph()
        aligner.collate(graph, collation)
        table = aligner.table
 
        self.assertRow([0, -1, -2, -3, -4], table[0])
        self.assertRow([-1, 0, -1, -2, -3], table[1])
        self.assertRow([-2, -1, -2, -1, -2], table[2])
        self.assertRow([-3, -2, -3, -2, -1], table[3])
示例#11
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            debug_scores=False):
    algorithm = EditGraphAligner(collation,
                                 near_match=near_match,
                                 astar=astar,
                                 debug_scores=debug_scores)
    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "graph":
        return graph

    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if collation.pretokenized and not segmentation:
        token_list = [[tk.token_data for tk in witness.tokens()]
                      for witness in collation.witnesses]
        # only with segmentation=False
        # there could be a different comportment of get_tokenized_table if semgentation=True
        table = get_tokenized_at(table,
                                 token_list,
                                 segmentation=segmentation,
                                 layout=layout)
        # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict)
        if output == "table" or output == "html":
            for row in table.rows:
                row.cells = [cell["t"] for cell in row.cells]

    if output == "json":
        return export_alignment_table_as_json(table, layout=layout)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: " + output)
示例#12
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None):
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=near_match,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=near_match,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "svg":
        return display_variant_graph_as_SVG(graph)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: " + output)
示例#13
0
    def testOmission(self):
        collation = Collation()
        collation.add_plain_witness("A", "a b c")
        collation.add_plain_witness("B", "b c")
        aligner = EditGraphAligner(collation)
        graph = VariantGraph()
        aligner.collate(graph, collation)
        table = aligner.table
#         self.debug_table(aligner, table)
        self.assertEqual(0, table[0][0].g)
        self.assertEqual(-1, table[0][1].g)
        self.assertEqual(-2, table[0][2].g)
        self.assertEqual(-3, table[0][3].g)
        self.assertEqual(-1, table[1][0].g)
        self.assertEqual(-2, table[1][1].g)
        self.assertEqual(-1, table[1][2].g)
        self.assertEqual(-2, table[1][3].g)
        self.assertEqual(-2, table[2][0].g)
        self.assertEqual(-3, table[2][1].g)
        self.assertEqual(-2, table[2][2].g)
        self.assertEqual(-1, table[2][3].g)
示例#14
0
 def testOmission(self):
     collation = Collation()
     collation.add_plain_witness("A", "a b c")
     collation.add_plain_witness("B", "b c")
     aligner = EditGraphAligner(collation)
     graph = VariantGraph()
     aligner.collate(graph)
     table = aligner.table
     #         self.debug_table(aligner, table)
     self.assertEqual(0, table[0][0].g)
     self.assertEqual(-1, table[0][1].g)
     self.assertEqual(-2, table[0][2].g)
     self.assertEqual(-3, table[0][3].g)
     self.assertEqual(-1, table[1][0].g)
     self.assertEqual(-2, table[1][1].g)
     self.assertEqual(-1, table[1][2].g)
     self.assertEqual(-2, table[1][3].g)
     self.assertEqual(-2, table[2][0].g)
     self.assertEqual(-3, table[2][1].g)
     self.assertEqual(-2, table[2][2].g)
     self.assertEqual(-1, table[2][3].g)
示例#15
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, debug_scores=False):
    scorer = Scorer(collation, near_match)
    algorithm = EditGraphAligner(collation, scorer, astar=astar, debug_scores=debug_scores)
    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "svg":
        return display_variant_graph_as_SVG(graph)
    if output=="graph": 
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: "+output)
示例#16
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            svg_output=None,
            indent=False,
            scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking,
                                witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)
示例#17
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking, witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)
示例#18
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph

    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if collation.pretokenized and not segmentation:
        token_list = [[tk.token_data for tk in witness.tokens()]
                      for witness in collation.witnesses]
        # only with segmentation=False
        # there could be a different comportment of get_tokenized_table if semgentation=True
        table = get_tokenized_at(table,
                                 token_list,
                                 segmentation=segmentation,
                                 layout=layout)
        # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict)
        if output == "table" or output == "html":
            for row in table.rows:
                row.cells = [cell["t"] for cell in row.cells]

    if output == "json":
        return export_alignment_table_as_json(table, layout=layout)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)
示例#19
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)