Пример #1
0
mdf_lmf.update({
    # dialx : dialecte BO / PA / GO / GO(s) / GO(n) + WEM / WE => OK
    "dialx" : lambda dialx, lexical_entry: lexical_entry.set_usage_note(dialx.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), language="nua"),
    # empr : emprunt => OK
    "empr"  : lambda empr, lexical_entry: set_bw(empr, lexical_entry),
    # sc : nom scientifique => OK
    "sc"    : lambda sc, lexical_entry: lexical_entry.set_scientific_name(force_caps(sc)),
    # ge : French gloss
    "ge"    : lambda ge, lexical_entry: lexical_entry.set_gloss(force_caps(ge.replace('_', ' ').replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE")), language=config.xml.French),
    # xn : French example
    "xn"    : lambda xn, lexical_entry: lexical_entry.add_example(force_caps(xn), language=config.xml.French),
    # xe : English example
    "xe"    : lambda xe, lexical_entry: lexical_entry.add_example(force_caps(xe), language=config.xml.English),
    # sge : French gloss of the subentry
    "sge"   : lambda sge, lexical_entry: lexical_entry.set_gloss(force_caps(sge), language=config.xml.French),
    # de : French definition
    "de"    : lambda de, lexical_entry: lexical_entry.set_definition(force_caps(de), language=config.xml.French),
    # gr : note grammaticale => [Note grammaticale : ] à la suite de [Note : ]
    "gr"    : lambda gr, lexical_entry: lexical_entry.set_note(gr, type="grammar", language=config.xml.regional),
    # gt: traduction de gr en français => [Note grammaticale : 'gr' (en gras) 'gt' (non gras)]
    "gt"    : lambda gt, lexical_entry: lexical_entry.set_note(force_caps(gt), type="grammar", language=config.xml.French),
    # ce : French translation of cf => cf : 'cf' (en gras) 'ce' (non gras)
    "ce"    : lambda ce, lexical_entry: set_ce(force_caps(ce), lexical_entry),
    # nt : note => OK
    "nt"    : lambda nt, lexical_entry: lexical_entry.set_note(nt, type="general"),
    # ng : note grammaticale => OK
    "ng"    : lambda ng, lexical_entry: lexical_entry.set_note(ng, type="grammar", language=config.xml.vernacular),
    # np : note phonologique => OK
    "np"    : lambda np, lexical_entry: lexical_entry.set_note(np, type="phonology"),
    # na : note anthropologique => OK
    "na"    : lambda na, lexical_entry: lexical_entry.set_note(na, type="anthropology"),
    # ve : dialect(s) of variant BO / PA / GO / GO(s) / GO(n) + WEM / WE / vx / BO [BM] / BO (Corne) / BO (Corne, BM)
    "ve"    : lambda ve, lexical_entry: lexical_entry.set_dialect(ve.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE")),
    # xv : vernacular example => OK
    "xv"    : lambda xv, lexical_entry: lexical_entry.create_and_add_example(xv.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), language=config.xml.vernacular),
    # cf : confer => OK
    "cf"    : lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(cf.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), "simple link")
})
Пример #2
0
## To define languages and fonts
import config
FRENCH = "French"

items=lambda lexical_entry: lexical_entry.get_lexeme().replace('{', '').replace('}', '')

## Functions to process some MDF fields (input)
def remove_char(value):
    """Function to remove '_', '^', '$', '&' character at the beginning of 'lx', 'se', 'a', 'xv', 'cf' MDF fields.
    """
    return value.lstrip('_^$&')

mdf_lmf.update({
    "lx"    : lambda lx, lexical_entry: lexical_entry.set_lexeme(remove_char(lx)),
    "a"     : lambda a, lexical_entry: lexical_entry.set_variant_form(remove_char(a), type="phonetics"),
    "se"    : lambda se, lexical_entry: lexical_entry.create_and_add_related_form(remove_char(se), mdf_semanticRelation["se"]),
    "xv"    : lambda xv, lexical_entry: lexical_entry.create_and_add_example(remove_char(xv), language=config.xml.vernacular),
    "cf"    : lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(remove_char(cf), mdf_semanticRelation["cf"])
})

## Functions to process some MDF fields (output)
def process_audio(lexical_entry):
    sf = []
    for form_representation in lexical_entry.get_form_representations():
        if form_representation.get_audio() is not None and form_representation.get_audio().get_fileName() is not None:
            sf.append(form_representation.get_audio().get_fileName())
    return sf

lmf_mdf.update({
    "sf" : lambda lexical_entry: process_audio(lexical_entry)
})
Пример #3
0
            ok = True
    if not ok:
        print Warning("Citation form '%s' of lexical entry '%s' is not consistant with generated one." % (nep.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))

def check_se(lexical_entry, se_tmp):
    import os
    ok = False
    for form in lexical_entry.find_related_forms(mdf_semanticRelation["se"]):
        if form == se_tmp:
            ok = True
    if not ok:
        print Warning("Subentry '%s' generated for lexical entry '%s' is not consistant." % (se_tmp.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))

mdf_lmf.update({
    "nep"       : lambda nep, lexical_entry: check_nep(lexical_entry, nep), # infinitive in devanagari => check that it corresponds to 'lc_dev' value
    # Generated markers
    "lx_tmp"    : lambda lx_tmp, lexical_entry : check_lx(lexical_entry, lx_tmp), # root in IPA => check that it corresponds to 'lx' value
    "se_tmp"    : lambda se_tmp, lexical_entry : check_se(lexical_entry, se_tmp) # => check that it corresponds to 'se' value
})

## Functions to process some LaTeX fields (output)

def format_lexeme(lexical_entry, font):
    import output.tex as tex
    result = ""
    inf_dev = font[NATIONAL](lexical_entry.get_citation_forms(script_name="devanagari")[0]) # lc_dev
    inf_api = font[VERNACULAR](lexical_entry.get_citation_forms(script_name="ipa")[0]) # lc
    root_api = font[VERNACULAR](lexical_entry.get_lexeme()) # lx
    if lexical_entry.is_subentry():
        result += "\\subparagraph{\\dollar\\blacksquare\\dollar "
    else:
        result += "\\vspace{0.5cm} \\paragraph{\\hspace{-0.5cm} "
Пример #4
0
def config_read(filename):
    """! @brief Read an XML file giving the user configuration.
    @param filename The name of the XML file to read with full path, for instance 'pylmflib/pylmflib/config/default/config.xml'.
    @return A Lexical Resource.
    """
    import os
    import config.xml
    configuration = parse_xml(filename)
    # Parse XML elements
    for format in configuration:
        if format.tag == "Language":
            # XML element "Language" have several XML subelements "lang"
            for lang in format:
                # XML elements "lang" have 2 XML attributes: one for the nature of the language ("att"), a second for the language code ("val")
                exec("config.xml." + lang.attrib["att"] + " = '" + lang.attrib["val"] + "'")
        elif format.tag == "Font":
            config.xml.font = dict()
            # XML element "Font" have several XML subelements "font"
            for font in format:
                # XML elements "font" have 2 XML attributes: one for the nature of the language ("att"), a second for the variable name ("var")
                exec("l = lambda " + font.attrib['var'] + ": " + font.text)
                config.xml.font.update({font.attrib['att']: l})
        elif format.tag == "LMF":
            # Create lexical resource and set DTD version
            lexical_resource = LexicalResource(format[0].attrib["dtdVersion"])
            for object in format[0]:
                if object.tag == "GlobalInformation":
                    # Set global information
                    for feat in object:
                        if feat.attrib["att"] == "languageCode":
                            lexical_resource.set_language_code(feat.attrib["val"])
                        elif feat.attrib["att"] == "author":
                            lexical_resource.set_author(feat.attrib["val"])
                        elif feat.attrib["att"] == "version":
                            lexical_resource.set_version(feat.attrib["val"])
                        elif feat.attrib["att"] == "lastUpdate":
                            lexical_resource.set_last_update(feat.attrib["val"])
                        elif feat.attrib["att"] == "license":
                            lexical_resource.set_license(feat.attrib["val"])
                        elif feat.attrib["att"] == "characterEncoding":
                            lexical_resource.set_character_encoding(feat.attrib["val"])
                        elif feat.attrib["att"] == "dateCoding":
                            lexical_resource.set_date_coding(feat.attrib["val"])
                        elif feat.attrib["att"] == "creationDate":
                            lexical_resource.set_creation_date(feat.attrib["val"])
                        elif feat.attrib["att"] == "projectName":
                            lexical_resource.set_project_name(feat.attrib["val"])
                        elif feat.attrib["att"] == "description":
                            lexical_resource.set_description(feat.attrib["val"])
                elif object.tag == "Lexicon":
                    # Create lexicon and set identifier
                    lexicon = Lexicon(object.attrib["id"])
                    # Set lexicon attributes
                    for feat in object:
                        if feat.attrib["att"] == "language":
                            lexicon.set_language(feat.attrib["val"])
                        elif feat.attrib["att"] == "languageScript":
                            lexicon.set_languageScript(feat.attrib["val"])
                        elif feat.attrib["att"] == "label":
                            lexicon.set_label(feat.attrib["val"])
                        elif feat.attrib["att"] == "lexiconType":
                            lexicon.set_lexiconType(feat.attrib["val"])
                        elif feat.attrib["att"] == "entrySource":
                            lexicon.set_entrySource(feat.attrib["val"])
                        elif feat.attrib["att"] == "localPath":
                            lexicon.set_localPath(feat.attrib["val"])
                            # Set absolute path to audio files
                            config.xml.audio_path = os.path.abspath(os.path.abspath('.') + "/" + feat.attrib["val"]) + "/"
                    # Attach lexicon to the lexical resource
                    lexical_resource.add_lexicon(lexicon)
        elif format.tag == "MDF":
            for mdf in format:
                if mdf.tag == "mdf_lmf":
                    # XML elements "mdf_lmf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var")
                    exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text)
                    mdf_lmf.update({mdf.attrib['marker']: l})
                elif mdf.tag == "ps_partOfSpeech":
                    # XML elements "ps_partOfSpeech" have 2 XML attributes: one for the MDF value ("ps"), a second for the LMF value ("partOfSpeech")
                    ps_partOfSpeech.update({mdf.attrib['ps']: mdf.attrib['partOfSpeech']})
                    # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute -->
                    partOfSpeech_range.add(mdf.attrib['partOfSpeech'])
                    # And automatically update the reverse operation
                    partOfSpeech_tex.update({mdf.attrib['partOfSpeech']: mdf.attrib['ps']})
                elif mdf.tag == "pdl_paradigmLabel":
                    # XML elements "pdl_paradigmLabel" have 2 XML attributes: one for the MDF value ("pdl"), a second for the LMF value ("paradigmLabel")
                    pdl_paradigmLabel.update({mdf.attrib['pdl']: mdf.attrib['paradigmLabel']})
                    # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute -->
                    paradigmLabel_range.add(mdf.attrib['paradigmLabel'])
                    # And automatically update the reverse operation
                    paradigmLabel_tex.update({mdf.attrib['paradigmLabel']: mdf.attrib['pdl']})
                elif mdf.tag == "lmf_mdf":
                    # XML elements "lmf_mdf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var")
                    exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text)
                    lmf_mdf.update({mdf.attrib['marker']: l})
                elif mdf.tag == "mdf_order":
                    mdf_order = []
                    for element in mdf:
                        mdf_order.append(element.tag)
                        list1 = []
                        for level1 in element:
                            list1.append(level1.tag)
                            list2 = []
                            for level2 in level1:
                                list2.append(level2.tag)
                            if len(list2) != 0:
                                list1.append(list2)
                        if len(list1) != 0:
                            mdf_order.append(list1)
        elif format.tag == "LaTeX":
            for param in format:
                if param.tag == "partOfSpeech_tex":
                    # XML elements "partOfSpeech_tex" have 2 or 3 XML attributes: one for the LMF value ("partOfSpeech"), a second for the LaTeX value ("tex"), and an optional one to define language
                    try:
                        partOfSpeech_tex.update({(param.attrib['lang'], param.attrib['partOfSpeech']): param.attrib['tex']})
                    except KeyError:
                        partOfSpeech_tex.update({param.attrib['partOfSpeech']: param.attrib['tex']})
                    # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute -->
                    partOfSpeech_range.add(param.attrib['partOfSpeech'])
                elif param.tag == "paradigmLabel_tex":
                    # XML elements "paradigmLabel_tex" have 2 XML attributes: one for the LMF value ("paradigmLabel"), a second for the LaTeX value ("tex")
                    paradigmLabel_tex.update({param.attrib['paradigmLabel']: param.attrib['tex']})
                    # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute -->
                    paradigmLabel_range.add(param.attrib['paradigmLabel'])
        else:
            raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename)
    return lexical_resource
Пример #5
0
        if form == se_tmp:
            ok = True
    if not ok:
        print Warning(
            "Subentry '%s' generated for lexical entry '%s' is not consistant."
            % (se_tmp.encode(ENCODING),
               lexical_entry.get_lexeme().encode(ENCODING)))


mdf_lmf.update({
    "nep":
    lambda nep, lexical_entry: check_nep(
        lexical_entry, nep
    ),  # infinitive in devanagari => check that it corresponds to 'lc_dev' value
    # Generated markers
    "lx_tmp":
    lambda lx_tmp, lexical_entry:
    check_lx(lexical_entry, lx_tmp
             ),  # root in IPA => check that it corresponds to 'lx' value
    "se_tmp":
    lambda se_tmp, lexical_entry: check_se(
        lexical_entry, se_tmp)  # => check that it corresponds to 'se' value
})

## Functions to process some LaTeX fields (output)


def format_lexeme(lexical_entry, font):
    import output.tex as tex
    result = ""
    inf_dev = font[NATIONAL](lexical_entry.get_citation_forms(
        script_name="devanagari")[0])  # lc_dev
Пример #6
0
## Functions to process some MDF fields (input)
def remove_char(value):
    """Function to remove '_', '^', '$', '&' character at the beginning of 'lx', 'se', 'a', 'xv', 'cf' MDF fields.
    """
    return value.lstrip('_^$&')


mdf_lmf.update({
    "lx":
    lambda lx, lexical_entry: lexical_entry.set_lexeme(remove_char(lx)),
    "a":
    lambda a, lexical_entry: lexical_entry.set_variant_form(remove_char(a),
                                                            type="phonetics"),
    "se":
    lambda se, lexical_entry: lexical_entry.create_and_add_related_form(
        remove_char(se), mdf_semanticRelation["se"]),
    "xv":
    lambda xv, lexical_entry: lexical_entry.create_and_add_example(
        remove_char(xv), language=config.xml.vernacular),
    "cf":
    lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(
        remove_char(cf), mdf_semanticRelation["cf"])
})


## Functions to process some MDF fields (output)
def process_audio(lexical_entry):
    sf = []
    for form_representation in lexical_entry.get_form_representations():
        if form_representation.get_audio(
        ) is not None and form_representation.get_audio().get_fileName(
Пример #7
0
mdf_lmf.update({
    # dialx : dialecte BO / PA / GO / GO(s) / GO(n) + WEM / WE => OK
    "dialx":
    lambda dialx, lexical_entry: lexical_entry.set_usage_note(dialx.replace(
        "GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"),
                                                              language="nua"),
    # empr : emprunt => OK
    "empr":
    lambda empr, lexical_entry: set_bw(empr, lexical_entry),
    # sc : nom scientifique => OK
    "sc":
    lambda sc, lexical_entry: lexical_entry.set_scientific_name(force_caps(sc)
                                                                ),
    # ge : French gloss
    "ge":
    lambda ge, lexical_entry: lexical_entry.set_gloss(force_caps(
        ge.replace('_', ' ').replace("GO(s)", "GOs").replace("GO(n)", "GOn").
        replace("WEM", "WE")),
                                                      language=config.xml.
                                                      French),
    # xn : French example
    "xn":
    lambda xn, lexical_entry: lexical_entry.add_example(
        force_caps(xn), language=config.xml.French),
    # xe : English example
    "xe":
    lambda xe, lexical_entry: lexical_entry.add_example(
        force_caps(xe), language=config.xml.English),
    # sge : French gloss of the subentry
    "sge":
    lambda sge, lexical_entry: lexical_entry.set_gloss(
        force_caps(sge), language=config.xml.French),
    # de : French definition
    "de":
    lambda de, lexical_entry: lexical_entry.set_definition(
        force_caps(de), language=config.xml.French),
    # gr : note grammaticale => [Note grammaticale : ] à la suite de [Note : ]
    "gr":
    lambda gr, lexical_entry: lexical_entry.set_note(
        gr, type="grammar", language=config.xml.regional),
    # gt: traduction de gr en français => [Note grammaticale : 'gr' (en gras) 'gt' (non gras)]
    "gt":
    lambda gt, lexical_entry: lexical_entry.set_note(
        force_caps(gt), type="grammar", language=config.xml.French),
    # ce : French translation of cf => cf : 'cf' (en gras) 'ce' (non gras)
    "ce":
    lambda ce, lexical_entry: set_ce(force_caps(ce), lexical_entry),
    # nt : note => OK
    "nt":
    lambda nt, lexical_entry: lexical_entry.set_note(nt, type="general"),
    # ng : note grammaticale => OK
    "ng":
    lambda ng, lexical_entry: lexical_entry.set_note(
        ng, type="grammar", language=config.xml.vernacular),
    # np : note phonologique => OK
    "np":
    lambda np, lexical_entry: lexical_entry.set_note(np, type="phonology"),
    # na : note anthropologique => OK
    "na":
    lambda na, lexical_entry: lexical_entry.set_note(na, type="anthropology"),
    # ve : dialect(s) of variant BO / PA / GO / GO(s) / GO(n) + WEM / WE / vx / BO [BM] / BO (Corne) / BO (Corne, BM)
    "ve":
    lambda ve, lexical_entry: lexical_entry.set_dialect(
        ve.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace(
            "WEM", "WE")),
    # xv : vernacular example => OK
    "xv":
    lambda xv, lexical_entry:
    lexical_entry.create_and_add_example(xv.replace("GO(s)", "GOs").replace(
        "GO(n)", "GOn").replace("WEM", "WE"),
                                         language=config.xml.vernacular),
    # cf : confer => OK
    "cf":
    lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(
        cf.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace(
            "WEM", "WE"), "simple link")
})
Пример #8
0
    final_mark = set(['.', '!', '?', u"\u3002"])
    if text[-1] not in final_mark:
        if language == config.xml.English or language == config.xml.French:
            text += '.'
        elif language == config.xml.national or language == config.xml.regional:
            text += u"\u3002"
    return text

mdf_lmf.update({
    "__lx"  : lambda attributes, lx, lexical_entry: process_lx(attributes, lx, lexical_entry),
    "__se"  : lambda attributes, se, lexical_entry: lexical_entry.create_and_add_related_form(se, mdf_semanticRelation["se"]),
    "__nt"  : lambda attributes, nt, lexical_entry: process_nt(attributes, nt, lexical_entry),
    "__np"  : lambda attributes, np, lexical_entry: process_np(attributes, np, lexical_entry),
    "__ec"  : lambda attributes, ec, lexical_entry: process_ec(attributes, ec, lexical_entry),
    "__sd"  : lambda attributes, sd, lexical_entry: process_sd(attributes, sd, lexical_entry),
    "__cf"  : lambda attributes, cf, lexical_entry: process_cf(attributes, cf, lexical_entry),
    # Force first character of definitions to be in upper case
    "dv"    : lambda dv, lexical_entry: lexical_entry.set_definition(force_caps(dv), language=config.xml.vernacular),
    "de"    : lambda de, lexical_entry: lexical_entry.set_definition(add_final(force_caps(de), language=config.xml.English), language=config.xml.English),
    "dn"    : lambda dn, lexical_entry: lexical_entry.set_definition(add_final(force_caps(dn), language=config.xml.national), language=config.xml.national),
    "dr"    : lambda dr, lexical_entry: lexical_entry.set_definition(add_final(force_caps(dr), language=config.xml.regional), language=config.xml.regional),
    "df"    : lambda df, lexical_entry: lexical_entry.set_definition(add_final(force_caps(df), language=config.xml.French), language=config.xml.French)
})

## Functions to process some MDF fields (output)

def get_ec(lexical_entry):
    ec = lexical_entry.get_etymology_comment()
    if lexical_entry.get_term_source_language() is not None:
        ec = "<lang=\"" + lexical_entry.get_term_source_language() + "\">" + " " + ec
    return ec
Пример #9
0
mdf_lmf.update({
    "__lx":
    lambda attributes, lx, lexical_entry: process_lx(attributes, lx,
                                                     lexical_entry),
    "__se":
    lambda attributes, se, lexical_entry: lexical_entry.
    create_and_add_related_form(se, mdf_semanticRelation["se"]),
    "__nt":
    lambda attributes, nt, lexical_entry: process_nt(attributes, nt,
                                                     lexical_entry),
    "__np":
    lambda attributes, np, lexical_entry: process_np(attributes, np,
                                                     lexical_entry),
    "__ec":
    lambda attributes, ec, lexical_entry: process_ec(attributes, ec,
                                                     lexical_entry),
    "__sd":
    lambda attributes, sd, lexical_entry: process_sd(attributes, sd,
                                                     lexical_entry),
    "__cf":
    lambda attributes, cf, lexical_entry: process_cf(attributes, cf,
                                                     lexical_entry),
    # Force first character of definitions to be in upper case
    "dv":
    lambda dv, lexical_entry: lexical_entry.set_definition(
        force_caps(dv), language=config.xml.vernacular),
    "de":
    lambda de, lexical_entry: lexical_entry.set_definition(
        add_final(force_caps(de), language=config.xml.English),
        language=config.xml.English),
    "dn":
    lambda dn, lexical_entry: lexical_entry.set_definition(
        add_final(force_caps(dn), language=config.xml.national),
        language=config.xml.national),
    "dr":
    lambda dr, lexical_entry: lexical_entry.set_definition(
        add_final(force_caps(dr), language=config.xml.regional),
        language=config.xml.regional),
    "df":
    lambda df, lexical_entry: lexical_entry.set_definition(
        add_final(force_caps(df), language=config.xml.French),
        language=config.xml.French)
})
Пример #10
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from config.mdf import mdf_lmf, lmf_mdf

## To define languages and fonts
import config

items=lambda lexical_entry: lexical_entry.get_lexeme()

## Functions to process some MDF fields (input)
mdf_lmf.update({})

## Functions to process some MDF fields (output)
lmf_mdf.update({})

## Functions to process some LaTeX fields (output)

## Function giving order in which information must be written in LaTeX and mapping between LMF representation and LaTeX (output)