示例#1
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions,
                                     debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)
示例#2
0
 def create_from_dict(cls, data, limit=None):
     if "witnesses" not in data:
         raise UnsupportedError("Json input not valid")
     witnesses = data["witnesses"]
     collation = Collation()
     for witness in witnesses[:limit]:
         # generate collation object from json_data
         collation.add_witness(witness)
         # determine if data is pretokenized
         if 'tokens' in witness:
             collation.pretokenized = True
     return collation
示例#3
0
def get_tokenized_at(table,
                     token_list,
                     segmentation=False,
                     layout="horizontal"):
    tokenized_at = AlignmentTable(Collation(), layout=layout)
    for witness_row, witness_tokens in zip(table.rows, token_list):
        new_row = Row(witness_row.header)
        tokenized_at.rows.append(new_row)
        counter = 0
        for cell in witness_row.cells:
            if cell == "-":
                # TODO: should probably be null or None instead, but that would break the rendering at the moment (line 41)
                new_row.cells.append({"t": "-"})
            # if segmentation=False
            else:
                new_row.cells.append(witness_tokens[counter])
                counter += 1
            # else if segmentation=True
            ##token_list must be a list of Token instead of list of dict (update lines 34, 64)
            ##line 41 will not be happy in case of table/html output
            #string = witness_tokens[counter].token_string
            #token_counter = 1
            #while string != cell :
            #    if counter+token_counter-1 < len(witness_tokens)-1:
            #        #add token_string of the next token until it is equivalent to the string in the cell
            #        #if we are not at the last token
            #        string += ' '+witness_tokens[counter+token_counter].token_string
            #        token_counter += 1
            ##there is one list level too many in the output
            #new_row.cells.append([tk.token_data for tk in witness_tokens[counter:counter+token_counter]])
            #counter += token_counter.
    return tokenized_at
示例#4
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            svg_output=None,
            indent=False,
            scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking,
                                witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)
示例#5
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph

    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if collation.pretokenized and not segmentation:
        token_list = [[tk.token_data for tk in witness.tokens()]
                      for witness in collation.witnesses]
        # only with segmentation=False
        # there could be a different comportment of get_tokenized_table if semgentation=True
        table = get_tokenized_at(table,
                                 token_list,
                                 segmentation=segmentation,
                                 layout=layout)
        # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict)
        if output == "table" or output == "html":
            for row in table.rows:
                row.cells = [cell["t"] for cell in row.cells]

    if output == "json":
        return export_alignment_table_as_json(table, layout=layout)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)
示例#6
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking, witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)
示例#7
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            indent=False):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        # There is already a graph ('graph', without near-match edges) and ranking ('ranking')
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')
        ranking = perform_near_match(graph, ranking)

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_svg(graph, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_html(table)
    if output == "html2":
        return visualize_table_vertically_with_colors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    if output == "csv" or output == "tsv":
        return display_alignment_table_as_csv(table, output)
    else:
        raise Exception("Unknown output type: " + output)