def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError('segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)
def create_from_dict(cls, data, limit=None): if "witnesses" not in data: raise UnsupportedError("Json input not valid") witnesses = data["witnesses"] collation = Collation() for witness in witnesses[:limit]: # generate collation object from json_data collation.add_witness(witness) # determine if data is pretokenized if 'tokens' in witness: collation.pretokenized = True return collation
def get_tokenized_at(table, token_list, segmentation=False, layout="horizontal"): tokenized_at = AlignmentTable(Collation(), layout=layout) for witness_row, witness_tokens in zip(table.rows, token_list): new_row = Row(witness_row.header) tokenized_at.rows.append(new_row) counter = 0 for cell in witness_row.cells: if cell == "-": # TODO: should probably be null or None instead, but that would break the rendering at the moment (line 41) new_row.cells.append({"t": "-"}) # if segmentation=False else: new_row.cells.append(witness_tokens[counter]) counter += 1 # else if segmentation=True ##token_list must be a list of Token instead of list of dict (update lines 34, 64) ##line 41 will not be happy in case of table/html output #string = witness_tokens[counter].token_string #token_counter = 1 #while string != cell : # if counter+token_counter-1 < len(witness_tokens)-1: # #add token_string of the next token until it is equivalent to the string in the cell # #if we are not at the last token # string += ' '+witness_tokens[counter+token_counter].token_string # token_counter += 1 ##there is one list level too many in the output #new_row.cells.append([tk.token_data for tk in witness_tokens[counter:counter+token_counter]]) #counter += token_counter. return tokenized_at
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if collation.pretokenized and not segmentation: token_list = [[tk.token_data for tk in witness.tokens()] for witness in collation.witnesses] # only with segmentation=False # there could be a different comportment of get_tokenized_table if semgentation=True table = get_tokenized_at(table, token_list, segmentation=segmentation, layout=layout) # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict) if output == "table" or output == "html": for row in table.rows: row.cells = [cell["t"] for cell in row.cells] if output == "json": return export_alignment_table_as_json(table, layout=layout) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError('segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, indent=False): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary # There is already a graph ('graph', without near-match edges) and ranking ('ranking') if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') ranking = perform_near_match(graph, ranking) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_svg(graph, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_html(table) if output == "html2": return visualize_table_vertically_with_colors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) if output == "csv" or output == "tsv": return display_alignment_table_as_csv(table, output) else: raise Exception("Unknown output type: " + output)