def lgr_set_annotate(lgr, script_lgr, set_labels_input, labels_input): """ Annotate a list of labels with their disposition. :param lgr: The LGR set object. :param script_lgr: The LGR object for the script used to check label validity. :param set_labels_input: The labels in the lgr set. :param labels_input: The file containing the labels """ # First, we need to filter-out out-of-LGR labels from the set_labels_input: yield "# The following labels from the set labels are invalid\n" filtered_set = [] for label, valid, error in read_labels(set_labels_input, lgr.unicode_database): if not valid: yield "%s: %s\n" % (label, error) else: label_cp = tuple([ord(c) for c in label]) if not lgr._test_preliminary_eligibility(label_cp)[0]: yield "%s: invalid\n" % label else: filtered_set.append(label) yield "# End of filtered set labels\n\n" for label, valid, error in read_labels(labels_input, script_lgr.unicode_database): if not valid: out = error yield "%s: %s\n" % (label, out) else: label_cp = tuple([ord(c) for c in label]) # First, verify that a proposed label is valid by processing it with the Element LGR # corresponding to the script that was selected for the label in the application. (eligible, _, label_invalid_parts, disp, action_idx, _) = script_lgr.test_label_eligible(label_cp, collect_log=False) collision = '' if eligible: # Second, process the now validated label against the common LGR to verify it does not collide # with any existing delegated labels (and any of their variants, whether blocked or allocatable). if label in filtered_set: collision = 'Label is in the LGR set labels' indexes = get_collisions(lgr, filtered_set + [label], quiet=False) if len(indexes) > 0: collision = 'Label collides with the LGR set labels' out = disp if collision: # TODO do we need to change disp to invalid??? out = '{} - {}'.format(disp, collision) for l in _out_valid_label(lgr, label, eligible, label_invalid_parts, out, action_idx): yield l
def annotate(lgr, labels_input): """ Annotate a list of labels with their disposition. :param lgr: The LGR info object. :param labels_input: The file containing the labels """ for label in read_labels(labels_input, lgr.unicode_database): label_cp = tuple([ord(c) for c in label]) disp = lgr.test_label_eligible(label_cp, collect_log=False)[3] yield "%s: %s\n" % (label, disp)
def diff(lgr_1, lgr_2, labels_input, show_collision=True, show_dump=False, quiet=False): """ Show diff for a list of labels between 2 LGR :param lgr_1: The first LGR info object. :param lgr_2: The second LGR info object. :param labels_input: The file containing the labels :param show_collision: Output collisions :param show_dump: Generate a full dump :param quiet: Do not print rules """ labels = set(read_labels(labels_input, lgr_1.unicode_database)) # get diff between labels and variants for the two LGR # keep label without collision as we need to compare label1_indexes = _generate_indexes(lgr_1, labels, keep=True, quiet=quiet) label2_indexes = _generate_indexes(lgr_2, labels, keep=True, quiet=quiet) # generate a dictionary of indexes per label labels_dic = {} yield "# LGR comparison #\n" for label in labels: label_cp = tuple([ord(c) for c in label]) index1 = lgr_1.generate_index_label(label_cp) index2 = lgr_2.generate_index_label(label_cp) labels_dic[label] = (index1, index2) for output in _compare(labels_dic, label1_indexes, label2_indexes): yield output # output collisions if show_collision: yield "\n\n# Collisions for LGR1 #\n" for output in _write_complete_output(label1_indexes): yield output if show_dump: yield "\n# Summary for LGR1 #\n" for output in _full_dump(label1_indexes): yield output yield "\n\n# Collisions for LGR2 #\n" for output in _write_complete_output(label2_indexes): yield output if show_dump: yield "\n# Summary for LGR2 #\n\n" for output in _full_dump(label2_indexes): yield output
def get_collisions(lgr, labels_input, quiet=True): """ Get collisions index in a list of labels for a given LGR :param lgr: The LGR object :param labels_input: The file containing the labels :param quiet: Do not get rules :return: The indexes for collisions """ from lgr.tools.utils import read_labels labels = set() for label, valid, error in read_labels(labels_input, lgr.unicode_database): if valid: labels.add(label) label_indexes, _ = _generate_indexes(lgr, labels, keep=False, quiet=quiet) return label_indexes
def annotate(lgr, labels_input): """ Annotate a list of labels with their disposition. :param lgr: The LGR info object. :param labels_input: The file containing the labels """ for label, valid, error in read_labels(labels_input, lgr.unicode_database): if valid: label_cp = tuple([ord(c) for c in label]) (eligible, _, label_invalid_parts, disp, action_idx, _) = lgr.test_label_eligible(label_cp, collect_log=False) for l in _out_valid_label(lgr, label, eligible, label_invalid_parts, disp, action_idx): yield l else: yield "%s: %s\n" % (label, error)
def cross_script_variants(lgr, labels_input): """ Compute cross-script variants of labels. :param lgr: The LGR to use for variant generation. :param labels_input: The file containing the labels """ if lgr.metadata is None: logger.error("Cannot generate cross-scripts variants " "for LGR without metadata") raise Exception if lgr.unicode_database is None: logger.error("Cannot generate cross-scripts variants " "for LGR without unicode database attached") raise Exception found = False for label, valid, error in read_labels(labels_input, lgr.unicode_database): if not valid: yield "Input label {}: {}\n".format(label, error) else: label_cp = tuple([ord(c) for c in label]) result, _, _, _, _, _ = lgr.test_label_eligible(label_cp) if not result: continue label_displayed = False for variant, disp, script_mapping in _generate_variants( lgr, label_cp): if not label_displayed: # Only display input label if it has x-variants yield "Input label {} ({}) has cross-script variants:\n".format( format_cp(label_cp), label) label_displayed = True found = True yield "\t- Cross-variant {} ({}), disposition {}:\n".format( format_cp(variant), cp_to_ulabel(variant), disp) yield '\t\t+ ' + '\t\t+ '.join([ "{} ({}): {}\n".format(format_cp(c), cp_to_ulabel(c), s) for c, s in script_mapping.items() ]) if not found: yield 'No cross-script variants for input!'
def collision(lgr, labels_input, show_dump=False, quiet=False): """ Show collisions in a list of labels for a given LGR :param lgr: The LGR info object. :param labels_input: The file containing the labels :param show_dump: Generate a full dump :param quiet: Do not print rules """ labels = set(read_labels(labels_input, lgr.unicode_database)) # get diff between labels and variants for the two LGR # only keep label without collision for a full dump label_indexes = _generate_indexes(lgr, labels, keep=show_dump, quiet=quiet) # output collisions yield "\n# Collisions #\n\n" for output in _write_complete_output(label_indexes): yield output if show_dump: yield "\n# Summary #\n\n" for output in _full_dump(label_indexes): yield output
def collision(lgr, labels_input, show_dump=False, quiet=False): """ Show collisions in a list of labels for a given LGR :param lgr: The LGR object. :param labels_input: The file containing the labels :param show_dump: Generate a full dump :param quiet: Do not print rules """ from lgr.tools.utils import read_labels labels = set() for label, valid, error in read_labels(labels_input, lgr.unicode_database): if valid: labels.add(label) else: yield "Label {}: {}\n".format(label, error) # get diff between labels and variants for the two LGR # only keep label without collision for a full dump label_indexes, not_in_lgr = _generate_indexes(lgr, labels, keep=show_dump, quiet=quiet) if not_in_lgr: yield "\n# Labels not in LGR #\n\n" for label_cp in not_in_lgr: yield "Label {}\n".format(cp_to_ulabel(label_cp)) # output collisions yield "\n# Collisions #\n\n" for output in _write_complete_output(label_indexes): yield output if show_dump: yield "\n# Summary #\n\n" for output in _full_dump(label_indexes): yield output
def main(): parser = argparse.ArgumentParser(description='LGR Validate CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-g', '--variants', action='store_true', help='Generate variants') parser.add_argument('-x', '--lgr-xml', metavar='LGR_XML', action='append', required=True, help='The LGR or LGR set if used multiple times') parser.add_argument( '-s', '--lgr-script', metavar='LGR_SCRIPT', help='If LGR is a set, the script used to validate input labels') parser.add_argument( '-f', '--set-labels', metavar='SET_LABELS', help='If LGR is a set, the file containing the label of the LGR set') args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.WARNING logging.basicConfig(stream=sys.stdout, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if len(args.lgr_xml) > 1: if not args.lgr_script: logger.error('For LGR set, lgr script is required') return if not args.set_labels: logger.error('For LGR set, LGR set labels file is required') return merged_lgr, lgr_set = merge_lgrs(args.lgr_xml, unidb=unidb) if not merged_lgr: logger.error('Error while creating the merged LGR') return with io.open(args.set_labels, 'r', encoding='utf-8') as set_labels_input: set_labels = StringIO(set_labels_input.read()) script_lgr = None for lgr_s in lgr_set: try: if lgr_s.metadata.languages[0] == args.lgr_script: if script_lgr: logger.warning( 'Script %s is provided in more than one LGR of the set, ' 'will only evaluate with %s', args.lgr_script, lgr_s.name) script_lgr = lgr_s except (AttributeError, IndexError): pass if not script_lgr: logger.error( 'Cannot find script %s in any of the LGR provided as input', args.lgr_script) return else: lgr_parser = XMLParser(args.lgr_xml[0]) lgr_parser.unicode_database = unidb if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return filtered_set_labels = [] if len(args.lgr_xml) > 1: write_output( "# The following labels from the set labels are invalid\n") for label, valid, error in read_labels(set_labels, script_lgr.unicode_database): if not valid: write_output("{}: {}\n".format(label, error)) else: label_cp = tuple([ord(c) for c in label]) if not script_lgr._test_preliminary_eligibility(label_cp)[0]: write_output("%s: Not in LGR %s\n" % label, script_lgr) else: filtered_set_labels.append(label) write_output("# End of filtered set labels\n\n") for label in get_stdin().read().splitlines(): if len(args.lgr_xml) > 1: check_label(script_lgr, label, args.variants, merged_lgr=merged_lgr, set_labels=filtered_set_labels) else: check_label(lgr, label, args.variants)
def diff(lgr_1, lgr_2, labels_input, show_collision=True, show_dump=False, quiet=False): """ Show diff for a list of labels between 2 LGR :param lgr_1: The first LGR info object. :param lgr_2: The second LGR info object. :param labels_input: The file containing the labels :param show_collision: Output collisions :param show_dump: Generate a full dump :param quiet: Do not print rules """ from lgr.tools.utils import read_labels labels = set() for label, valid, error in read_labels(labels_input, lgr_1.unicode_database): if valid: labels.add(label) else: yield "Label {}: {}\n".format(label, error) # get diff between labels and variants for the two LGR # keep label without collision as we need to compare label1_indexes, not_in_lgr_1 = _generate_indexes(lgr_1, labels, keep=True, quiet=quiet) label2_indexes, not_in_lgr_2 = _generate_indexes(lgr_2, labels, keep=True, quiet=quiet) if not_in_lgr_1 or not_in_lgr_2: for index, not_in_lgr in enumerate([not_in_lgr_1, not_in_lgr_2], 1): yield "# Labels not in LGR {} #\n\n".format(index) for label_cp in not_in_lgr: yield "Label {}\n".format(cp_to_ulabel(label_cp)) yield '\n' # generate a dictionary of indexes per label labels_dic = {} yield "\n# LGR comparison #\n" for label in labels: label_cp = tuple([ord(c) for c in label]) try: index1 = lgr_1.generate_index_label(label_cp) except NotInLGR: yield "Label {} not in LGR {}\n".format(label, lgr_1) continue try: index2 = lgr_2.generate_index_label(label_cp) except NotInLGR: yield "Label {} not in LGR {}\n".format(label, lgr_2) continue labels_dic[label] = (index1, index2) for output in _compare(labels_dic, label1_indexes, label2_indexes): yield output # output collisions if show_collision: yield "\n\n# Collisions for LGR1 #\n" for output in _write_complete_output(label1_indexes): yield output if show_dump: yield "\n# Summary for LGR1 #\n" for output in _full_dump(label1_indexes): yield output yield "\n\n# Collisions for LGR2 #\n" for output in _write_complete_output(label2_indexes): yield output if show_dump: yield "\n# Summary for LGR2 #\n\n" for output in _full_dump(label2_indexes): yield output