Python VariantGraphRanking示例，collatex.core_classes.VariantGraphRanking Python示例

示例#1

0

显示文件

文件： core_functions.py 项目： interedition/collatex

def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions,
                                     debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)

示例#2

0

显示文件

文件： display_module.py 项目： zebraft/collatex

def display_variant_graph_as_svg(graph, output):
    a = pygraphviz.AGraph(directed=True, rankdir='LR')
    counter = 0
    mapping = {}
    ranking = VariantGraphRanking.of(graph)
    # add nodes
    for n in graph.graph.nodes():
        counter += 1
        mapping[n] = counter
        if output == "svg_simple":
            label = n.label
            if label == '':
                label = '#'
            a.add_node(mapping[n], label=label)
        else:
            rank = ranking.byVertex[n]
            readings = ["<TR><TD ALIGN='LEFT'><B>" + n.label + "</B></TD><TD ALIGN='LEFT'>exact: " + str(
                rank) + "</TD></TR>"]
            reverse_dict = defaultdict(list)
            for key, value in n.tokens.items():
                reverse_dict["".join(
                    re.sub(r'>', r'&gt;', re.sub(r'<', r'&lt;', item.token_data["t"])) for item in value)].append(
                    key)
            for key, value in sorted(reverse_dict.items()):
                reading = (
                    "<TR><TD ALIGN='LEFT'><FONT FACE='Bukyvede'>{}</FONT></TD><TD ALIGN='LEFT'>{}</TD></TR>").format(
                    key, ', '.join(value))
                readings.append(reading)
            a.add_node(mapping[n], label='<<TABLE CELLSPACING="0">' + "".join(readings) + '</TABLE>>')

    # add edges
    for u,v,edge_data in graph.graph.edges_iter(data=True):
        a.add_edge(mapping[u], mapping[v], edge_data["label"])
    for key, value in ranking.byRank.items():
        a.add_subgraph([mapping[item] for item in value], rank='same')
    svg = a.draw(prog='dot', format='svg')
    # diagnostic, not for production
    # dot = a.draw(prog='dot')
    # print(dot.decode(encoding='utf-8'))
    # display using the IPython SVG module
    return display(SVG(svg))

示例#3

0

显示文件

文件： near_matching.py 项目： interedition/collatex

def perform_near_match(graph, ranking):
    # Walk ranking table in reverse order and add near-match edges to graph
    reverse_topological_sorted_vertices = reversed(list(topological_sort(graph.graph)))
    for v in reverse_topological_sorted_vertices:
        ##### Doesn't work:
        #         target_rank = ranking.byVertex[v] # get the rank of a vertex
        #
        # in_edges = graph.in_edges(v) # if it has more than one in_edge, perhaps something before it can be moved
        # if len(in_edges) > 1:
        #     # candidates for movement are the sources of in edges more than one rank earlir
        #     move_candidates = [in_edge[0] for in_edge in in_edges \
        #                        if target_rank > ranking.byVertex[in_edge[0]] + 1]
        #     for move_candidate in move_candidates:
        #         move_candidate_witnesses = set(move_candidate.tokens) # prepare to get intersection later
        #         min_rank = ranking.byVertex[move_candidate] # lowest possible rank is current position
        #         max_rank = target_rank - 1 # highest possible rank is one more before the target
        #         vertices_to_compare = flatten([ranking.byRank[r] for r in range(min_rank, max_rank + 1)])
        #         vertices_to_compare.remove(move_candidate) # don't compare it to itself
        #         print('comparing ', move_candidate, ' to ', vertices_to_compare)
        #         ratio_dict = {} # ratio:vertex_to_compare
        #         for vertex_to_compare in vertices_to_compare:
        #             # don't move if there's already a vertex there with any of the same witnesses
        #             if not move_candidate_witnesses.intersection(vertex_to_compare.tokens):
        #                 print('now comparing move candidate ', move_candidate, \
        #                       ' (witnesses ', move_candidate_witnesses,\
        #                       ') with ', vertex_to_compare, ' (witnesses ', vertex_to_compare.tokens, ')')
        #                 ratio = Levenshtein.ratio(str(move_candidate), str(vertex_to_compare))
        #                 ratio_dict[ratio] = vertex_to_compare
        #         # Create only winning edge; losing edges can create later cycles
        #         graph.connect_near(ratio_dict[max(ratio_dict)], move_candidate, ratio)
        #         print('connected ', move_candidate, ' to ', ratio_dict[max(ratio_dict)], \
        #               ' with ratio ', max(ratio_dict))
        ######
        in_edges = graph.in_edges(v, data=True)
        for source, target, edgedata in in_edges:
            # can only move if two conditions are both true:
            # 1) rank of source differs from v by more than 1; max target rank will be rank of v - 1
            # 2) out_edges from source must have no target at exactly one rank higher than source
            if ranking.byVertex[v] - ranking.byVertex[source] > 1 and \
                    1 not in [ranking.byVertex[v] - ranking.byVertex[u] for (u,v) in graph.out_edges(source)]:
                min_rank = ranking.byVertex[source]
                max_rank = ranking.byVertex[v]
                match_candidates = [item for item in flatten([ranking.byRank[rank] \
                                            for rank in range(min_rank, max_rank)]) if item is not source]
                # print(match_candidates)
                levenshtein_dict = defaultdict(list)
                for match_candidate in match_candidates:
                    ratio = Levenshtein.ratio(str(source), str(match_candidate))
                    # print(source, match_candidate, ratio)
                    levenshtein_dict[ratio].append(match_candidate)
                weight = max(levenshtein_dict)
                winner = levenshtein_dict[max(levenshtein_dict)][0]
                # print('weight:',weight,'source:',winner)
                graph.connect_near(winner,source,weight)
                # print('before: byRank',str(ranking.byRank))
                # print('before: byVertex',str(ranking.byVertex))
                # update ranking table for next pass through loop and verify
                ranking = VariantGraphRanking.of(graph)
                # print('after: byRank',str(ranking.byRank))
                # print('after: byVertex',str(ranking.byVertex))
    # Create new ranking table (passed along to creation of alignment table)
    return VariantGraphRanking.of(graph)

示例#4

0

显示文件

文件： core_functions.py 项目： bleekere/collatex

def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            svg_output=None,
            indent=False,
            scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking,
                                witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)

示例#5

0

显示文件

def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph

    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if collation.pretokenized and not segmentation:
        token_list = [[tk.token_data for tk in witness.tokens()]
                      for witness in collation.witnesses]
        # only with segmentation=False
        # there could be a different comportment of get_tokenized_table if semgentation=True
        table = get_tokenized_at(table,
                                 token_list,
                                 segmentation=segmentation,
                                 layout=layout)
        # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict)
        if output == "table" or output == "html":
            for row in table.rows:
                row.cells = [cell["t"] for cell in row.cells]

    if output == "json":
        return export_alignment_table_as_json(table, layout=layout)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)

示例#6

0

显示文件

文件： core_functions.py 项目： ljo/collatex

def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking, witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)

示例#7

0

显示文件

 def _rank_the_graph(self, phrase_matches, base):
     #TODO: rank the graph based on only the first vertex of each of the phrasematches!
     return VariantGraphRanking.of(base)

示例#8

0

显示文件

文件： core_functions.py 项目： schorrm/collatex

def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)

示例#9

0

显示文件

文件： near_matching.py 项目： schorrm/collatex

def perform_near_match(graph, ranking):
    # Walk ranking table in reverse order and add near-match edges to graph
    reverse_topological_sorted_vertices = reversed(
        list(topological_sort(graph.graph)))
    for v in reverse_topological_sorted_vertices:
        ##### Doesn't work:
        #         target_rank = ranking.byVertex[v] # get the rank of a vertex
        #
        # in_edges = graph.in_edges(v) # if it has more than one in_edge, perhaps something before it can be moved
        # if len(in_edges) > 1:
        #     # candidates for movement are the sources of in edges more than one rank earlir
        #     move_candidates = [in_edge[0] for in_edge in in_edges \
        #                        if target_rank > ranking.byVertex[in_edge[0]] + 1]
        #     for move_candidate in move_candidates:
        #         move_candidate_witnesses = set(move_candidate.tokens) # prepare to get intersection later
        #         min_rank = ranking.byVertex[move_candidate] # lowest possible rank is current position
        #         max_rank = target_rank - 1 # highest possible rank is one more before the target
        #         vertices_to_compare = flatten([ranking.byRank[r] for r in range(min_rank, max_rank + 1)])
        #         vertices_to_compare.remove(move_candidate) # don't compare it to itself
        #         print('comparing ', move_candidate, ' to ', vertices_to_compare)
        #         ratio_dict = {} # ratio:vertex_to_compare
        #         for vertex_to_compare in vertices_to_compare:
        #             # don't move if there's already a vertex there with any of the same witnesses
        #             if not move_candidate_witnesses.intersection(vertex_to_compare.tokens):
        #                 print('now comparing move candidate ', move_candidate, \
        #                       ' (witnesses ', move_candidate_witnesses,\
        #                       ') with ', vertex_to_compare, ' (witnesses ', vertex_to_compare.tokens, ')')
        #                 ratio = Levenshtein.ratio(str(move_candidate), str(vertex_to_compare))
        #                 ratio_dict[ratio] = vertex_to_compare
        #         # Create only winning edge; losing edges can create later cycles
        #         graph.connect_near(ratio_dict[max(ratio_dict)], move_candidate, ratio)
        #         print('connected ', move_candidate, ' to ', ratio_dict[max(ratio_dict)], \
        #               ' with ratio ', max(ratio_dict))
        ######
        in_edges = graph.in_edges(v, data=True)
        for source, target, edgedata in in_edges:
            # can only move if two conditions are both true:
            # 1) rank of source differs from v by more than 1; max target rank will be rank of v - 1
            # 2) out_edges from source must have no target at exactly one rank higher than source
            if ranking.byVertex[v] - ranking.byVertex[source] > 1 and \
                    1 not in [ranking.byVertex[v] - ranking.byVertex[u] for (u,v) in graph.out_edges(source)]:
                min_rank = ranking.byVertex[source]
                max_rank = ranking.byVertex[v]
                match_candidates = [item for item in flatten([ranking.byRank[rank] \
                                            for rank in range(min_rank, max_rank)]) if item is not source]
                # print(match_candidates)
                levenshtein_dict = defaultdict(list)
                for match_candidate in match_candidates:
                    ratio = Levenshtein.ratio(str(source),
                                              str(match_candidate))
                    # print(source, match_candidate, ratio)
                    levenshtein_dict[ratio].append(match_candidate)
                weight = max(levenshtein_dict)
                winner = levenshtein_dict[max(levenshtein_dict)][0]
                # print('weight:',weight,'source:',winner)
                graph.connect_near(winner, source, weight)
                # print('before: byRank',str(ranking.byRank))
                # print('before: byVertex',str(ranking.byVertex))
                # update ranking table for next pass through loop and verify
                ranking = VariantGraphRanking.of(graph)
                # print('after: byRank',str(ranking.byRank))
                # print('after: byVertex',str(ranking.byVertex))
    # Create new ranking table (passed along to creation of alignment table)
    return VariantGraphRanking.of(graph)

示例#10

0

显示文件

文件： display_module.py 项目： schorrm/collatex

def display_variant_graph_as_svg(graph, output):
    a = graphviz.Digraph(format="svg", graph_attr={'rankdir': 'LR'})
    counter = 0
    mapping = {}
    ranking = VariantGraphRanking.of(graph)

    # add nodes
    for n in graph.graph.nodes():
        counter += 1
        mapping[n] = str(counter)
        if output == "svg_simple":
            label = n.label
            if label == '':
                label = '#'
            a.node(mapping[n], label=label)
        else:
            rank = ranking.byVertex[n]
            readings = [
                "<TR><TD ALIGN='LEFT'><B>" + n.label +
                "</B></TD><TD ALIGN='LEFT'>exact: " + str(rank) + "</TD></TR>"
            ]
            reverse_dict = defaultdict(list)
            for key, value in n.tokens.items():
                reverse_dict["".join(
                    re.sub(r'>', r'&gt;',
                           re.sub(r'<', r'&lt;', item.token_data["t"]))
                    for item in value)].append(key)
            for key, value in sorted(reverse_dict.items()):
                reading = (
                    "<TR><TD ALIGN='LEFT'><FONT FACE='Bukyvede'>{}</FONT></TD><TD ALIGN='LEFT'>{}</TD></TR>"
                ).format(key, ', '.join(value))
                readings.append(reading)
            a.node(mapping[n],
                   label='<<TABLE CELLSPACING="0">' + "".join(readings) +
                   '</TABLE>>')

    # add regular (token sequence) edges
    for u, v, edgedata in graph.graph.edges(data=True):
        # print('regular edges ', u, v, edgedata)
        label = edgedata['label']
        a.edge(mapping[u], mapping[v], label=label)

    # add near-match edges
    # TODO: Show all near edges (currently), or just the top one?
    for u, v, edgedata in graph.near_graph.edges(data=True):
        # print('near-match edges ', u, v, edgedata)
        label = str('{:3.2f}'.format(edgedata['weight']))
        a.edge(mapping[u], mapping[v], style='dashed', label=label)
    # Add rank='same' information
    for key, value in ranking.byRank.items():
        # print(key, value)
        # print(key, value, len(value))
        # print(key, set(value), len(set(value)))
        tmp = graphviz.Digraph(graph_attr={'rank': 'same'})
        for n in [mapping[item] for item in value]:
            tmp.node(n)
        a.subgraph(tmp)
    # diagnostic, not for production
    # dot = a.draw(prog='dot')
    # print(dot.decode(encoding='utf-8'))
    # # display using the IPython SVG module
    svg = a.render()
    return display(SVG(svg))

示例#11

0

显示文件

文件： edit_graph_aligner.py 项目： zebraft/collatex

    def collate(self, graph):
        """
        :type graph: VariantGraph
        """
        # prepare the token index
        self.token_index.prepare()
        self.vertex_array = [None] * len(self.token_index.token_array)

        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = self.collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)
        # print("> token_to_vertex=", token_to_vertex)
        self.update_token_position_to_vertex(token_to_vertex)
        self.update_token_to_vertex_array(tokens, first_witness,
                                          self.token_position_to_vertex)

        # align witness 2 - n
        for x in range(1, len(self.collation.witnesses)):
            witness = self.collation.witnesses[x]
            tokens = witness.tokens()
            # print("\nwitness", witness.sigil)

            variant_graph_ranking = VariantGraphRanking.of(graph)
            # print("> x =", x, ", variant_graph_ranking =", variant_graph_ranking.byRank)
            variant_graph_ranks = list(
                set(
                    map(lambda v: variant_graph_ranking.byVertex.get(v),
                        graph.vertices())))
            # we leave in the rank of the start vertex, but remove the rank of the end vertex
            variant_graph_ranks.pop()

            # now the vertical stuff
            tokens_as_index_list = self.as_index_list(tokens)

            match_cube = MatchCube(self.token_index, witness,
                                   self.vertex_array, variant_graph_ranking,
                                   self.properties_filter)
            # print("> match_cube.matches=", match_cube.matches)
            self.fill_needleman_wunsch_table(variant_graph_ranks,
                                             tokens_as_index_list, match_cube)

            aligned = self.align_matching_tokens(match_cube)
            # print("> aligned=", aligned)
            # print("self.token_index.token_array=", self.token_index.token_array)
            # alignment = self.align_function(superbase, next_witness, token_to_vertex, match_cube)

            # merge
            witness_token_to_generated_vertex = self.merge(
                graph, witness.sigil, witness.tokens(), aligned)
            # print("> witness_token_to_generated_vertex =", witness_token_to_generated_vertex)
            token_to_vertex.update(witness_token_to_generated_vertex)
            # print("> token_to_vertex =", token_to_vertex)
            self.update_token_position_to_vertex(token_to_vertex, aligned)
            witness_token_position_to_vertex = {}
            for p in self.token_index.get_range_for_witness(witness.sigil):
                # print("> p= ", p)
                witness_token_position_to_vertex[
                    p] = self.token_position_to_vertex[p]
            self.update_token_to_vertex_array(
                tokens, witness, witness_token_position_to_vertex)
            # print("> vertex_array =", self.vertex_array)

            #             print("actual")
            #             self._debug_edit_graph_table(self.table)
            #             print("expected")
            #             self._debug_edit_graph_table(self.table2)

            # change superbase
            # superbase = self.new_superbase

            if self.detect_transpositions:
                detector = TranspositionDetection(self)
                detector.detect()