示例#1
0
def convert_wb_to_glossary_files(path_to_file):

    #wb = xlrd.open_workbook(path_to_file)
    df = pd.read_excel(path_to_file, sheet_name='bcp47')
    data = df.drop([0]) # remove row with country codes

    #config = get_config(wb, 0)
    #lang_list = get_langs(wb, config)
    #columns = get_headers(pd, 1, int(config['header_row']))
    headers = [*data]
    print(headers)

    container = "COFOE"
    #langtags = pandas.read_csv(langtags_csv)
    #xml_src_tag = map_langtag(langtags, config['source_lang'], 'cApStAn', 'OmegaT').split('-')[0]
    # xml_src_tag = xml_src_tag.split('-')[0]  # use only subtgg for compliance
    # source_col = columns.index(config['source_lang'])
    source_col = 'en'

    for col in data.columns:

        if tags.check(col) and col != source_col:
            lang_pair = dict(zip(data[source_col], data[col]))

            path_to_target_file = os.path.join(output_dir, f'{container}_{col}_glossary.utf8')

            with open(path_to_target_file, 'w') as csv_file:
                writer = csv.writer(csv_file, delimiter='\t')
                for key, value in lang_pair.items():
                   writer.writerow([key, value])
示例#2
0
    def set_languages(self, languages, force=False):
        """
        Convenience function to update the languages in the metadata.

        :param iterable languages: a collection of language tags as described in [RFC5646].
        :param bool force: If True, add the languages even if format is invalid.
        :raises LGRFormatException: if the language parameter
                                    has an invalid format.
        """
        # check all languages
        found_error = False
        for language in languages:
            try:
                if not rfc5646.check(language):
                    logger.log(logging.WARNING if force else logging.ERROR,
                               "Invalid language: '%s'", language)
                    found_error = True
            except UnicodeEncodeError:
                # Can't skip this one
                logger.error("Invalid non-ASCII language tag '%s'", language)
                languages.remove(language)

        if found_error and not force:
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
        else:
            self.languages = list(languages)
示例#3
0
    def validate_fhir_attachment(self):
        errors = []
        if self.values.data:
            if not self.values.contentType:
                errors.append(
                    "content_type must be populated if data is provided")

            mimetypes.init()
            mime = mimetypes.guess_extension(str(self.values.contentType))

            if not (mime in VALID_ATTACHMENT_EXTENSIONS):
                errors.append("The uploaded file type not supported")

        if self.values.language:
            if not tags.check(self.values.language):
                errors.append("the language code must be valid")

        if self.values.hash and self.values.data:
            byte_data = self.values.data
            d_hash = base64.b64decode(byte_data)
            valid_hash = hashlib.sha1(d_hash).digest()
            if not self.values.hash == valid_hash:
                errors.append(
                    "the hash must be a base64 sha-1 hash of the data")

        if len(errors) > 0:
            raise ValueError(errors)

        return self.values
示例#4
0
    def add_language(self, language, force=False):
        """
        Add a language handled by the LGR to the metadata list.

        Ensure the language is a valid RFC 5646 language tag.

        3.3.3.  The language Element
        The value of the "language"
        element MUST be a valid language tag as described in [RFC5646].

        :param str language: A new language of the LGR.
        :param bool force: If True, add the language even if format is invalid.
        :raises LGRFormatException: if the language parameter
                                    has an invalid format.
        """
        try:
            if not rfc5646.check(language):
                logger.log(logging.WARNING if force else logging.ERROR,
                           "Invalid language: '%s'", language)
                if not force:
                    raise LGRFormatException(
                        LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG
                    )
            self.languages.append(language)
        except UnicodeEncodeError:
            # Can't skip this one
            logger.error("Invalid non-ASCII language tag '%s'", language)
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
示例#5
0
def setBookLang(string):
    if not tags.check(string):
        print(
            'The specified book language tag is not a BCP 47 language tag. Exiting.'
        )
        sys.exit(1)
    global booklang
    booklang = string
示例#6
0
def setXmlLang(string):
    if not tags.check(string):
        print(
            'The specified XML language tag is not a BCP 47 language tag. Exiting.'
        )
        sys.exit(1)
    global xmllang
    xmllang = string
示例#7
0
 def _scrub_language(self, language):
     if tags.check(language):
         return language
     else:
         log.warn(
             'Encountered an invalid language %s. Falling back to "und".' %
             language)
         return 'und'
示例#8
0
def standardize_language(code):
    """Match `code` to a standard RFC5646 or RFC3066 language. The following
    approaches are tried in order:
    * Match a RFC5646 language string.
    * Match a RFC3066 language string.
    * Use a ISO-6639/2 bibliographic synonym, and match a RFC3066 language
    string for the ISO-6639/2 terminological code.
    If no results are found, `None` is returned.

    http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dclanguage
    http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.12

    :param code: string with a language code ('en-GB', ...)
    :returns: `LanguageTuple` with the RFC5646 code and the list of description
    tags, or `None` if the language could not be identified.
    """
    if not code:
        return None

    # Try RFC5646 (for EPUB 3).
    if tags.check(code):
        return LanguageTuple(code=code.lower(),
                             description=tags.description(code))

    # Try RFC3066 (for EPUB 2).
    # Try to get the ISO639-1 code for the language.
    try:
        lang = languages.get(iso639_2T_code=code)
        new_code = lang.iso639_1_code
    except KeyError:
        # Try synonym.
        if code in ISO_6639_2_B.keys():
            try:
                lang = languages.get(iso639_2T_code=ISO_6639_2_B[code])
                new_code = lang.iso639_1_code
            except KeyError:
                return None
        else:
            return None

    # Try RFC5646 for the ISO639-1 code.
    if tags.check(new_code):
        return LanguageTuple(code=new_code.lower(),
                             description=tags.description(new_code))
    return None
示例#9
0
def languagetag_isvalid_rule(node, language_tag, errors):
    """
    Check that a languagetag is a valid IANA language tag.
    """
    if not tags.check(language_tag):
        errors.append(colander.Invalid(
            node,
            'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors])
        ))
示例#10
0
def languagetag_isvalid_rule(node, language_tag, errors):
    """
    Check that a languagetag is a valid IANA language tag.
    """
    if not tags.check(language_tag):
        errors.append(
            colander.Invalid(
                node, 'Invalid language tag: %s' % ", ".join(
                    [err.message for err in tags.tag(language_tag).errors])))
示例#11
0
 def __init__(self, label, type="prefLabel", language="und"):
     self.label = label
     self.type = type
     if not language:
         language = 'und'
     if tags.check(language):
         self.language = language
     else:
         raise ValueError('%s is not a valid IANA language tag.' % language)
示例#12
0
 def __init__(self, label, type="prefLabel", language="und"):
     self.label = label
     self.type = type
     if not language:
         language = 'und'
     if tags.check(language):
         self.language = language
     else:
         raise ValueError('%s is not a valid IANA language tag.' % language)
示例#13
0
    def get(self):
        lang = flask.request.args.get('language', default='en')
        page = flask.request.args.get('page', default=1, type=int)
        page_size = flask.request.args.get('page-size', default=10, type=int)

        formatted_lang = tags.tag(lang).format
        lang_name = tags.description(lang)[0] if tags.check(lang) else None

        return game_repository.all_v2(formatted_lang, lang_name, page,
                                      page_size)
示例#14
0
 def __init__(self, note, type="note", language="und", markup=None):
     self.note = note
     self.type = type
     if not language:
         language = 'und'
     if tags.check(language):
         self.language = language
     else:
         raise ValueError('%s is not a valid IANA language tag.' % language)
     if self.is_valid_markup(markup):
         self.markup = markup
     else:
         raise ValueError('%s is not valid markup.' % markup)
示例#15
0
 def __init__(self, note, type="note", language="und", markup=None):
     self.note = note
     self.type = type
     if not language:
         language = 'und'
     if tags.check(language):
         self.language = language
     else:
         raise ValueError('%s is not a valid IANA language tag.' % language)
     if self.is_valid_markup(markup):
         self.markup = markup
     else:
         raise ValueError('%s is not valid markup.' % markup)
示例#16
0
def label_lang_rule(errors, node, request, labels):
    for label in labels:
        language_tag = label['language']
        if not tags.check(language_tag):
            errors.append(colander.Invalid(
                node['labels'],
                'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors])
            ))
        else:
            languages_present = request.db.query(Language).filter_by(id=language_tag).count()
            if not languages_present:
                descriptions = ', '.join(tags.description(language_tag))
                language_item = Language(id=language_tag, name=descriptions)
                request.db.add(language_item)
示例#17
0
def label_lang_rule(errors, node, request, labels):
    for label in labels:
        language_tag = label['language']
        if not tags.check(language_tag):
            errors.append(
                colander.Invalid(
                    node['labels'], 'Invalid language tag: %s' % ", ".join(
                        [err.message
                         for err in tags.tag(language_tag).errors])))
        else:
            languages_present = request.db.query(Language).filter_by(
                id=language_tag).count()
            if not languages_present:
                descriptions = ', '.join(tags.description(language_tag))
                language_item = Language(id=language_tag, name=descriptions)
                request.db.add(language_item)
示例#18
0
def _check_language(language_tag, session):
    '''
    Checks if a certain language is already present, if not import.

    :param string language_tag: IANA language tag
    :param session: Database session to use
    :rtype: :class:`skosprovider_sqlalchemy.models.Language`
    '''
    if not language_tag:
        language_tag = 'und'
    l = session.query(LanguageModel).get(language_tag)
    if not l:
        if not tags.check(language_tag):
            raise ValueError('Unable to import provider. Invalid language tag: %s' % language_tag)
        descriptions = ', '.join(tags.description(language_tag))
        l = LanguageModel(id=language_tag, name=descriptions)
        session.add(l)
    return l
示例#19
0
def label_lang_rule(errors, node, languages_manager, labels):
    """
    Checks that languages of a label are valid.

    Checks that they are valid IANA language tags. If the language tag was not
    already present in the database, it adds them.
    """
    for label in labels:
        language_tag = label['language']
        if not tags.check(language_tag):
            errors.append(colander.Invalid(
                node['labels'],
                'Invalid language tag: %s' % ", ".join([err.message for err in tags.tag(language_tag).errors])
            ))
        else:
            languages_present = languages_manager.count_languages(language_tag)
            if not languages_present:
                descriptions = ', '.join(tags.description(language_tag))
                language_item = Language(id=language_tag, name=descriptions)
                languages_manager.save(language_item)
示例#20
0
def _check_language(language_tag, session):
    '''
    Checks if a certain language is already present, if not import.

    :param string language_tag: IANA language tag
    :param session: Database session to use
    :rtype: :class:`skosprovider_sqlalchemy.models.Language`
    '''
    if not language_tag:
        language_tag = 'und'
    l = session.query(LanguageModel).get(language_tag)
    if not l:
        if not tags.check(language_tag):
            raise ValueError(
                'Unable to import provider. Invalid language tag: %s' %
                language_tag)
        descriptions = ', '.join(tags.description(language_tag))
        l = LanguageModel(id=language_tag, name=descriptions)
        session.add(l)
    return l
示例#21
0
def label_lang_rule(errors, node, languages_manager, labels):
    """
    Checks that languages of a label are valid.

    Checks that they are valid IANA language tags. If the language tag was not
    already present in the database, it adds them.
    """
    for label in labels:
        language_tag = label['language']
        if not tags.check(language_tag):
            errors.append(
                colander.Invalid(
                    node['labels'], 'Invalid language tag: %s' % ", ".join(
                        [err.message
                         for err in tags.tag(language_tag).errors])))
        else:
            languages_present = languages_manager.count_languages(language_tag)
            if not languages_present:
                descriptions = ', '.join(tags.description(language_tag))
                language_item = Language(id=language_tag, name=descriptions)
                languages_manager.save(language_item)
示例#22
0
 def _validate_language(value):
     return isinstance(value, six.string_types) and language_tags.check(value)
示例#23
0
 def test_checks(self):
     self.assertTrue(tags.check('en'))
示例#24
0
 def check_lang_tag(self, tag):
     if not tags.check(tag):
         self.print_language_tag_warning(tag)
示例#25
0
 def check_language_tag(self, key, value, metadata):
     if (key == 'lang' or key == '@language') and not tags.check(value):
         del metadata[key]
         self.print_language_tag_warning(value)
示例#26
0
def is_valid_ietf_language(language):
    return tags.check(language)
示例#27
0
 def test_checks(self):
     self.assertTrue(tags.check('en'))