def convert_wb_to_glossary_files(path_to_file): #wb = xlrd.open_workbook(path_to_file) df = pd.read_excel(path_to_file, sheet_name='bcp47') data = df.drop([0]) # remove row with country codes #config = get_config(wb, 0) #lang_list = get_langs(wb, config) #columns = get_headers(pd, 1, int(config['header_row'])) headers = [*data] print(headers) container = "COFOE" #langtags = pandas.read_csv(langtags_csv) #xml_src_tag = map_langtag(langtags, config['source_lang'], 'cApStAn', 'OmegaT').split('-')[0] # xml_src_tag = xml_src_tag.split('-')[0] # use only subtgg for compliance # source_col = columns.index(config['source_lang']) source_col = 'en' for col in data.columns: if tags.check(col) and col != source_col: lang_pair = dict(zip(data[source_col], data[col])) path_to_target_file = os.path.join(output_dir, f'{container}_{col}_glossary.utf8') with open(path_to_target_file, 'w') as csv_file: writer = csv.writer(csv_file, delimiter='\t') for key, value in lang_pair.items(): writer.writerow([key, value])
def set_languages(self, languages, force=False): """ Convenience function to update the languages in the metadata. :param iterable languages: a collection of language tags as described in [RFC5646]. :param bool force: If True, add the languages even if format is invalid. :raises LGRFormatException: if the language parameter has an invalid format. """ # check all languages found_error = False for language in languages: try: if not rfc5646.check(language): logger.log(logging.WARNING if force else logging.ERROR, "Invalid language: '%s'", language) found_error = True except UnicodeEncodeError: # Can't skip this one logger.error("Invalid non-ASCII language tag '%s'", language) languages.remove(language) if found_error and not force: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG) else: self.languages = list(languages)
def validate_fhir_attachment(self): errors = [] if self.values.data: if not self.values.contentType: errors.append( "content_type must be populated if data is provided") mimetypes.init() mime = mimetypes.guess_extension(str(self.values.contentType)) if not (mime in VALID_ATTACHMENT_EXTENSIONS): errors.append("The uploaded file type not supported") if self.values.language: if not tags.check(self.values.language): errors.append("the language code must be valid") if self.values.hash and self.values.data: byte_data = self.values.data d_hash = base64.b64decode(byte_data) valid_hash = hashlib.sha1(d_hash).digest() if not self.values.hash == valid_hash: errors.append( "the hash must be a base64 sha-1 hash of the data") if len(errors) > 0: raise ValueError(errors) return self.values
def add_language(self, language, force=False): """ Add a language handled by the LGR to the metadata list. Ensure the language is a valid RFC 5646 language tag. 3.3.3. The language Element The value of the "language" element MUST be a valid language tag as described in [RFC5646]. :param str language: A new language of the LGR. :param bool force: If True, add the language even if format is invalid. :raises LGRFormatException: if the language parameter has an invalid format. """ try: if not rfc5646.check(language): logger.log(logging.WARNING if force else logging.ERROR, "Invalid language: '%s'", language) if not force: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG ) self.languages.append(language) except UnicodeEncodeError: # Can't skip this one logger.error("Invalid non-ASCII language tag '%s'", language) raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
def setBookLang(string): if not tags.check(string): print( 'The specified book language tag is not a BCP 47 language tag. Exiting.' ) sys.exit(1) global booklang booklang = string
def setXmlLang(string): if not tags.check(string): print( 'The specified XML language tag is not a BCP 47 language tag. Exiting.' ) sys.exit(1) global xmllang xmllang = string
def _scrub_language(self, language): if tags.check(language): return language else: log.warn( 'Encountered an invalid language %s. Falling back to "und".' % language) return 'und'
def standardize_language(code): """Match `code` to a standard RFC5646 or RFC3066 language. The following approaches are tried in order: * Match a RFC5646 language string. * Match a RFC3066 language string. * Use a ISO-6639/2 bibliographic synonym, and match a RFC3066 language string for the ISO-6639/2 terminological code. If no results are found, `None` is returned. http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dclanguage http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.12 :param code: string with a language code ('en-GB', ...) :returns: `LanguageTuple` with the RFC5646 code and the list of description tags, or `None` if the language could not be identified. """ if not code: return None # Try RFC5646 (for EPUB 3). if tags.check(code): return LanguageTuple(code=code.lower(), description=tags.description(code)) # Try RFC3066 (for EPUB 2). # Try to get the ISO639-1 code for the language. try: lang = languages.get(iso639_2T_code=code) new_code = lang.iso639_1_code except KeyError: # Try synonym. if code in ISO_6639_2_B.keys(): try: lang = languages.get(iso639_2T_code=ISO_6639_2_B[code]) new_code = lang.iso639_1_code except KeyError: return None else: return None # Try RFC5646 for the ISO639-1 code. if tags.check(new_code): return LanguageTuple(code=new_code.lower(), description=tags.description(new_code)) return None
def languagetag_isvalid_rule(node, language_tag, errors): """ Check that a languagetag is a valid IANA language tag. """ if not tags.check(language_tag): errors.append(colander.Invalid( node, 'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors]) ))
def languagetag_isvalid_rule(node, language_tag, errors): """ Check that a languagetag is a valid IANA language tag. """ if not tags.check(language_tag): errors.append( colander.Invalid( node, 'Invalid language tag: %s' % ", ".join( [err.message for err in tags.tag(language_tag).errors])))
def __init__(self, label, type="prefLabel", language="und"): self.label = label self.type = type if not language: language = 'und' if tags.check(language): self.language = language else: raise ValueError('%s is not a valid IANA language tag.' % language)
def get(self): lang = flask.request.args.get('language', default='en') page = flask.request.args.get('page', default=1, type=int) page_size = flask.request.args.get('page-size', default=10, type=int) formatted_lang = tags.tag(lang).format lang_name = tags.description(lang)[0] if tags.check(lang) else None return game_repository.all_v2(formatted_lang, lang_name, page, page_size)
def __init__(self, note, type="note", language="und", markup=None): self.note = note self.type = type if not language: language = 'und' if tags.check(language): self.language = language else: raise ValueError('%s is not a valid IANA language tag.' % language) if self.is_valid_markup(markup): self.markup = markup else: raise ValueError('%s is not valid markup.' % markup)
def label_lang_rule(errors, node, request, labels): for label in labels: language_tag = label['language'] if not tags.check(language_tag): errors.append(colander.Invalid( node['labels'], 'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors]) )) else: languages_present = request.db.query(Language).filter_by(id=language_tag).count() if not languages_present: descriptions = ', '.join(tags.description(language_tag)) language_item = Language(id=language_tag, name=descriptions) request.db.add(language_item)
def label_lang_rule(errors, node, request, labels): for label in labels: language_tag = label['language'] if not tags.check(language_tag): errors.append( colander.Invalid( node['labels'], 'Invalid language tag: %s' % ", ".join( [err.message for err in tags.tag(language_tag).errors]))) else: languages_present = request.db.query(Language).filter_by( id=language_tag).count() if not languages_present: descriptions = ', '.join(tags.description(language_tag)) language_item = Language(id=language_tag, name=descriptions) request.db.add(language_item)
def _check_language(language_tag, session): ''' Checks if a certain language is already present, if not import. :param string language_tag: IANA language tag :param session: Database session to use :rtype: :class:`skosprovider_sqlalchemy.models.Language` ''' if not language_tag: language_tag = 'und' l = session.query(LanguageModel).get(language_tag) if not l: if not tags.check(language_tag): raise ValueError('Unable to import provider. Invalid language tag: %s' % language_tag) descriptions = ', '.join(tags.description(language_tag)) l = LanguageModel(id=language_tag, name=descriptions) session.add(l) return l
def label_lang_rule(errors, node, languages_manager, labels): """ Checks that languages of a label are valid. Checks that they are valid IANA language tags. If the language tag was not already present in the database, it adds them. """ for label in labels: language_tag = label['language'] if not tags.check(language_tag): errors.append(colander.Invalid( node['labels'], 'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors]) )) else: languages_present = languages_manager.count_languages(language_tag) if not languages_present: descriptions = ', '.join(tags.description(language_tag)) language_item = Language(id=language_tag, name=descriptions) languages_manager.save(language_item)
def _check_language(language_tag, session): ''' Checks if a certain language is already present, if not import. :param string language_tag: IANA language tag :param session: Database session to use :rtype: :class:`skosprovider_sqlalchemy.models.Language` ''' if not language_tag: language_tag = 'und' l = session.query(LanguageModel).get(language_tag) if not l: if not tags.check(language_tag): raise ValueError( 'Unable to import provider. Invalid language tag: %s' % language_tag) descriptions = ', '.join(tags.description(language_tag)) l = LanguageModel(id=language_tag, name=descriptions) session.add(l) return l
def label_lang_rule(errors, node, languages_manager, labels): """ Checks that languages of a label are valid. Checks that they are valid IANA language tags. If the language tag was not already present in the database, it adds them. """ for label in labels: language_tag = label['language'] if not tags.check(language_tag): errors.append( colander.Invalid( node['labels'], 'Invalid language tag: %s' % ", ".join( [err.message for err in tags.tag(language_tag).errors]))) else: languages_present = languages_manager.count_languages(language_tag) if not languages_present: descriptions = ', '.join(tags.description(language_tag)) language_item = Language(id=language_tag, name=descriptions) languages_manager.save(language_item)
def _validate_language(value): return isinstance(value, six.string_types) and language_tags.check(value)
def test_checks(self): self.assertTrue(tags.check('en'))
def check_lang_tag(self, tag): if not tags.check(tag): self.print_language_tag_warning(tag)
def check_language_tag(self, key, value, metadata): if (key == 'lang' or key == '@language') and not tags.check(value): del metadata[key] self.print_language_tag_warning(value)
def is_valid_ietf_language(language): return tags.check(language)