mdf_lmf.update({ # dialx : dialecte BO / PA / GO / GO(s) / GO(n) + WEM / WE => OK "dialx" : lambda dialx, lexical_entry: lexical_entry.set_usage_note(dialx.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), language="nua"), # empr : emprunt => OK "empr" : lambda empr, lexical_entry: set_bw(empr, lexical_entry), # sc : nom scientifique => OK "sc" : lambda sc, lexical_entry: lexical_entry.set_scientific_name(force_caps(sc)), # ge : French gloss "ge" : lambda ge, lexical_entry: lexical_entry.set_gloss(force_caps(ge.replace('_', ' ').replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE")), language=config.xml.French), # xn : French example "xn" : lambda xn, lexical_entry: lexical_entry.add_example(force_caps(xn), language=config.xml.French), # xe : English example "xe" : lambda xe, lexical_entry: lexical_entry.add_example(force_caps(xe), language=config.xml.English), # sge : French gloss of the subentry "sge" : lambda sge, lexical_entry: lexical_entry.set_gloss(force_caps(sge), language=config.xml.French), # de : French definition "de" : lambda de, lexical_entry: lexical_entry.set_definition(force_caps(de), language=config.xml.French), # gr : note grammaticale => [Note grammaticale : ] à la suite de [Note : ] "gr" : lambda gr, lexical_entry: lexical_entry.set_note(gr, type="grammar", language=config.xml.regional), # gt: traduction de gr en français => [Note grammaticale : 'gr' (en gras) 'gt' (non gras)] "gt" : lambda gt, lexical_entry: lexical_entry.set_note(force_caps(gt), type="grammar", language=config.xml.French), # ce : French translation of cf => cf : 'cf' (en gras) 'ce' (non gras) "ce" : lambda ce, lexical_entry: set_ce(force_caps(ce), lexical_entry), # nt : note => OK "nt" : lambda nt, lexical_entry: lexical_entry.set_note(nt, type="general"), # ng : note grammaticale => OK "ng" : lambda ng, lexical_entry: lexical_entry.set_note(ng, type="grammar", language=config.xml.vernacular), # np : note phonologique => OK "np" : lambda np, lexical_entry: lexical_entry.set_note(np, type="phonology"), # na : note anthropologique => OK "na" : lambda na, lexical_entry: lexical_entry.set_note(na, type="anthropology"), # ve : dialect(s) of variant BO / PA / GO / GO(s) / GO(n) + WEM / WE / vx / BO [BM] / BO (Corne) / BO (Corne, BM) "ve" : lambda ve, lexical_entry: lexical_entry.set_dialect(ve.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE")), # xv : vernacular example => OK "xv" : lambda xv, lexical_entry: lexical_entry.create_and_add_example(xv.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), language=config.xml.vernacular), # cf : confer => OK "cf" : lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(cf.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), "simple link") })
## To define languages and fonts import config FRENCH = "French" items=lambda lexical_entry: lexical_entry.get_lexeme().replace('{', '').replace('}', '') ## Functions to process some MDF fields (input) def remove_char(value): """Function to remove '_', '^', '$', '&' character at the beginning of 'lx', 'se', 'a', 'xv', 'cf' MDF fields. """ return value.lstrip('_^$&') mdf_lmf.update({ "lx" : lambda lx, lexical_entry: lexical_entry.set_lexeme(remove_char(lx)), "a" : lambda a, lexical_entry: lexical_entry.set_variant_form(remove_char(a), type="phonetics"), "se" : lambda se, lexical_entry: lexical_entry.create_and_add_related_form(remove_char(se), mdf_semanticRelation["se"]), "xv" : lambda xv, lexical_entry: lexical_entry.create_and_add_example(remove_char(xv), language=config.xml.vernacular), "cf" : lambda cf, lexical_entry: lexical_entry.create_and_add_related_form(remove_char(cf), mdf_semanticRelation["cf"]) }) ## Functions to process some MDF fields (output) def process_audio(lexical_entry): sf = [] for form_representation in lexical_entry.get_form_representations(): if form_representation.get_audio() is not None and form_representation.get_audio().get_fileName() is not None: sf.append(form_representation.get_audio().get_fileName()) return sf lmf_mdf.update({ "sf" : lambda lexical_entry: process_audio(lexical_entry) })
ok = True if not ok: print Warning("Citation form '%s' of lexical entry '%s' is not consistant with generated one." % (nep.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) def check_se(lexical_entry, se_tmp): import os ok = False for form in lexical_entry.find_related_forms(mdf_semanticRelation["se"]): if form == se_tmp: ok = True if not ok: print Warning("Subentry '%s' generated for lexical entry '%s' is not consistant." % (se_tmp.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) mdf_lmf.update({ "nep" : lambda nep, lexical_entry: check_nep(lexical_entry, nep), # infinitive in devanagari => check that it corresponds to 'lc_dev' value # Generated markers "lx_tmp" : lambda lx_tmp, lexical_entry : check_lx(lexical_entry, lx_tmp), # root in IPA => check that it corresponds to 'lx' value "se_tmp" : lambda se_tmp, lexical_entry : check_se(lexical_entry, se_tmp) # => check that it corresponds to 'se' value }) ## Functions to process some LaTeX fields (output) def format_lexeme(lexical_entry, font): import output.tex as tex result = "" inf_dev = font[NATIONAL](lexical_entry.get_citation_forms(script_name="devanagari")[0]) # lc_dev inf_api = font[VERNACULAR](lexical_entry.get_citation_forms(script_name="ipa")[0]) # lc root_api = font[VERNACULAR](lexical_entry.get_lexeme()) # lx if lexical_entry.is_subentry(): result += "\\subparagraph{\\dollar\\blacksquare\\dollar " else: result += "\\vspace{0.5cm} \\paragraph{\\hspace{-0.5cm} "
def config_read(filename): """! @brief Read an XML file giving the user configuration. @param filename The name of the XML file to read with full path, for instance 'pylmflib/pylmflib/config/default/config.xml'. @return A Lexical Resource. """ import os import config.xml configuration = parse_xml(filename) # Parse XML elements for format in configuration: if format.tag == "Language": # XML element "Language" have several XML subelements "lang" for lang in format: # XML elements "lang" have 2 XML attributes: one for the nature of the language ("att"), a second for the language code ("val") exec("config.xml." + lang.attrib["att"] + " = '" + lang.attrib["val"] + "'") elif format.tag == "Font": config.xml.font = dict() # XML element "Font" have several XML subelements "font" for font in format: # XML elements "font" have 2 XML attributes: one for the nature of the language ("att"), a second for the variable name ("var") exec("l = lambda " + font.attrib['var'] + ": " + font.text) config.xml.font.update({font.attrib['att']: l}) elif format.tag == "LMF": # Create lexical resource and set DTD version lexical_resource = LexicalResource(format[0].attrib["dtdVersion"]) for object in format[0]: if object.tag == "GlobalInformation": # Set global information for feat in object: if feat.attrib["att"] == "languageCode": lexical_resource.set_language_code(feat.attrib["val"]) elif feat.attrib["att"] == "author": lexical_resource.set_author(feat.attrib["val"]) elif feat.attrib["att"] == "version": lexical_resource.set_version(feat.attrib["val"]) elif feat.attrib["att"] == "lastUpdate": lexical_resource.set_last_update(feat.attrib["val"]) elif feat.attrib["att"] == "license": lexical_resource.set_license(feat.attrib["val"]) elif feat.attrib["att"] == "characterEncoding": lexical_resource.set_character_encoding(feat.attrib["val"]) elif feat.attrib["att"] == "dateCoding": lexical_resource.set_date_coding(feat.attrib["val"]) elif feat.attrib["att"] == "creationDate": lexical_resource.set_creation_date(feat.attrib["val"]) elif feat.attrib["att"] == "projectName": lexical_resource.set_project_name(feat.attrib["val"]) elif feat.attrib["att"] == "description": lexical_resource.set_description(feat.attrib["val"]) elif object.tag == "Lexicon": # Create lexicon and set identifier lexicon = Lexicon(object.attrib["id"]) # Set lexicon attributes for feat in object: if feat.attrib["att"] == "language": lexicon.set_language(feat.attrib["val"]) elif feat.attrib["att"] == "languageScript": lexicon.set_languageScript(feat.attrib["val"]) elif feat.attrib["att"] == "label": lexicon.set_label(feat.attrib["val"]) elif feat.attrib["att"] == "lexiconType": lexicon.set_lexiconType(feat.attrib["val"]) elif feat.attrib["att"] == "entrySource": lexicon.set_entrySource(feat.attrib["val"]) elif feat.attrib["att"] == "localPath": lexicon.set_localPath(feat.attrib["val"]) # Set absolute path to audio files config.xml.audio_path = os.path.abspath(os.path.abspath('.') + "/" + feat.attrib["val"]) + "/" # Attach lexicon to the lexical resource lexical_resource.add_lexicon(lexicon) elif format.tag == "MDF": for mdf in format: if mdf.tag == "mdf_lmf": # XML elements "mdf_lmf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var") exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text) mdf_lmf.update({mdf.attrib['marker']: l}) elif mdf.tag == "ps_partOfSpeech": # XML elements "ps_partOfSpeech" have 2 XML attributes: one for the MDF value ("ps"), a second for the LMF value ("partOfSpeech") ps_partOfSpeech.update({mdf.attrib['ps']: mdf.attrib['partOfSpeech']}) # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute --> partOfSpeech_range.add(mdf.attrib['partOfSpeech']) # And automatically update the reverse operation partOfSpeech_tex.update({mdf.attrib['partOfSpeech']: mdf.attrib['ps']}) elif mdf.tag == "pdl_paradigmLabel": # XML elements "pdl_paradigmLabel" have 2 XML attributes: one for the MDF value ("pdl"), a second for the LMF value ("paradigmLabel") pdl_paradigmLabel.update({mdf.attrib['pdl']: mdf.attrib['paradigmLabel']}) # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute --> paradigmLabel_range.add(mdf.attrib['paradigmLabel']) # And automatically update the reverse operation paradigmLabel_tex.update({mdf.attrib['paradigmLabel']: mdf.attrib['pdl']}) elif mdf.tag == "lmf_mdf": # XML elements "lmf_mdf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var") exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text) lmf_mdf.update({mdf.attrib['marker']: l}) elif mdf.tag == "mdf_order": mdf_order = [] for element in mdf: mdf_order.append(element.tag) list1 = [] for level1 in element: list1.append(level1.tag) list2 = [] for level2 in level1: list2.append(level2.tag) if len(list2) != 0: list1.append(list2) if len(list1) != 0: mdf_order.append(list1) elif format.tag == "LaTeX": for param in format: if param.tag == "partOfSpeech_tex": # XML elements "partOfSpeech_tex" have 2 or 3 XML attributes: one for the LMF value ("partOfSpeech"), a second for the LaTeX value ("tex"), and an optional one to define language try: partOfSpeech_tex.update({(param.attrib['lang'], param.attrib['partOfSpeech']): param.attrib['tex']}) except KeyError: partOfSpeech_tex.update({param.attrib['partOfSpeech']: param.attrib['tex']}) # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute --> partOfSpeech_range.add(param.attrib['partOfSpeech']) elif param.tag == "paradigmLabel_tex": # XML elements "paradigmLabel_tex" have 2 XML attributes: one for the LMF value ("paradigmLabel"), a second for the LaTeX value ("tex") paradigmLabel_tex.update({param.attrib['paradigmLabel']: param.attrib['tex']}) # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute --> paradigmLabel_range.add(param.attrib['paradigmLabel']) else: raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename) return lexical_resource
if form == se_tmp: ok = True if not ok: print Warning( "Subentry '%s' generated for lexical entry '%s' is not consistant." % (se_tmp.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) mdf_lmf.update({ "nep": lambda nep, lexical_entry: check_nep( lexical_entry, nep ), # infinitive in devanagari => check that it corresponds to 'lc_dev' value # Generated markers "lx_tmp": lambda lx_tmp, lexical_entry: check_lx(lexical_entry, lx_tmp ), # root in IPA => check that it corresponds to 'lx' value "se_tmp": lambda se_tmp, lexical_entry: check_se( lexical_entry, se_tmp) # => check that it corresponds to 'se' value }) ## Functions to process some LaTeX fields (output) def format_lexeme(lexical_entry, font): import output.tex as tex result = "" inf_dev = font[NATIONAL](lexical_entry.get_citation_forms( script_name="devanagari")[0]) # lc_dev
## Functions to process some MDF fields (input) def remove_char(value): """Function to remove '_', '^', '$', '&' character at the beginning of 'lx', 'se', 'a', 'xv', 'cf' MDF fields. """ return value.lstrip('_^$&') mdf_lmf.update({ "lx": lambda lx, lexical_entry: lexical_entry.set_lexeme(remove_char(lx)), "a": lambda a, lexical_entry: lexical_entry.set_variant_form(remove_char(a), type="phonetics"), "se": lambda se, lexical_entry: lexical_entry.create_and_add_related_form( remove_char(se), mdf_semanticRelation["se"]), "xv": lambda xv, lexical_entry: lexical_entry.create_and_add_example( remove_char(xv), language=config.xml.vernacular), "cf": lambda cf, lexical_entry: lexical_entry.create_and_add_related_form( remove_char(cf), mdf_semanticRelation["cf"]) }) ## Functions to process some MDF fields (output) def process_audio(lexical_entry): sf = [] for form_representation in lexical_entry.get_form_representations(): if form_representation.get_audio( ) is not None and form_representation.get_audio().get_fileName(
mdf_lmf.update({ # dialx : dialecte BO / PA / GO / GO(s) / GO(n) + WEM / WE => OK "dialx": lambda dialx, lexical_entry: lexical_entry.set_usage_note(dialx.replace( "GO(s)", "GOs").replace("GO(n)", "GOn").replace("WEM", "WE"), language="nua"), # empr : emprunt => OK "empr": lambda empr, lexical_entry: set_bw(empr, lexical_entry), # sc : nom scientifique => OK "sc": lambda sc, lexical_entry: lexical_entry.set_scientific_name(force_caps(sc) ), # ge : French gloss "ge": lambda ge, lexical_entry: lexical_entry.set_gloss(force_caps( ge.replace('_', ' ').replace("GO(s)", "GOs").replace("GO(n)", "GOn"). replace("WEM", "WE")), language=config.xml. French), # xn : French example "xn": lambda xn, lexical_entry: lexical_entry.add_example( force_caps(xn), language=config.xml.French), # xe : English example "xe": lambda xe, lexical_entry: lexical_entry.add_example( force_caps(xe), language=config.xml.English), # sge : French gloss of the subentry "sge": lambda sge, lexical_entry: lexical_entry.set_gloss( force_caps(sge), language=config.xml.French), # de : French definition "de": lambda de, lexical_entry: lexical_entry.set_definition( force_caps(de), language=config.xml.French), # gr : note grammaticale => [Note grammaticale : ] à la suite de [Note : ] "gr": lambda gr, lexical_entry: lexical_entry.set_note( gr, type="grammar", language=config.xml.regional), # gt: traduction de gr en français => [Note grammaticale : 'gr' (en gras) 'gt' (non gras)] "gt": lambda gt, lexical_entry: lexical_entry.set_note( force_caps(gt), type="grammar", language=config.xml.French), # ce : French translation of cf => cf : 'cf' (en gras) 'ce' (non gras) "ce": lambda ce, lexical_entry: set_ce(force_caps(ce), lexical_entry), # nt : note => OK "nt": lambda nt, lexical_entry: lexical_entry.set_note(nt, type="general"), # ng : note grammaticale => OK "ng": lambda ng, lexical_entry: lexical_entry.set_note( ng, type="grammar", language=config.xml.vernacular), # np : note phonologique => OK "np": lambda np, lexical_entry: lexical_entry.set_note(np, type="phonology"), # na : note anthropologique => OK "na": lambda na, lexical_entry: lexical_entry.set_note(na, type="anthropology"), # ve : dialect(s) of variant BO / PA / GO / GO(s) / GO(n) + WEM / WE / vx / BO [BM] / BO (Corne) / BO (Corne, BM) "ve": lambda ve, lexical_entry: lexical_entry.set_dialect( ve.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace( "WEM", "WE")), # xv : vernacular example => OK "xv": lambda xv, lexical_entry: lexical_entry.create_and_add_example(xv.replace("GO(s)", "GOs").replace( "GO(n)", "GOn").replace("WEM", "WE"), language=config.xml.vernacular), # cf : confer => OK "cf": lambda cf, lexical_entry: lexical_entry.create_and_add_related_form( cf.replace("GO(s)", "GOs").replace("GO(n)", "GOn").replace( "WEM", "WE"), "simple link") })
final_mark = set(['.', '!', '?', u"\u3002"]) if text[-1] not in final_mark: if language == config.xml.English or language == config.xml.French: text += '.' elif language == config.xml.national or language == config.xml.regional: text += u"\u3002" return text mdf_lmf.update({ "__lx" : lambda attributes, lx, lexical_entry: process_lx(attributes, lx, lexical_entry), "__se" : lambda attributes, se, lexical_entry: lexical_entry.create_and_add_related_form(se, mdf_semanticRelation["se"]), "__nt" : lambda attributes, nt, lexical_entry: process_nt(attributes, nt, lexical_entry), "__np" : lambda attributes, np, lexical_entry: process_np(attributes, np, lexical_entry), "__ec" : lambda attributes, ec, lexical_entry: process_ec(attributes, ec, lexical_entry), "__sd" : lambda attributes, sd, lexical_entry: process_sd(attributes, sd, lexical_entry), "__cf" : lambda attributes, cf, lexical_entry: process_cf(attributes, cf, lexical_entry), # Force first character of definitions to be in upper case "dv" : lambda dv, lexical_entry: lexical_entry.set_definition(force_caps(dv), language=config.xml.vernacular), "de" : lambda de, lexical_entry: lexical_entry.set_definition(add_final(force_caps(de), language=config.xml.English), language=config.xml.English), "dn" : lambda dn, lexical_entry: lexical_entry.set_definition(add_final(force_caps(dn), language=config.xml.national), language=config.xml.national), "dr" : lambda dr, lexical_entry: lexical_entry.set_definition(add_final(force_caps(dr), language=config.xml.regional), language=config.xml.regional), "df" : lambda df, lexical_entry: lexical_entry.set_definition(add_final(force_caps(df), language=config.xml.French), language=config.xml.French) }) ## Functions to process some MDF fields (output) def get_ec(lexical_entry): ec = lexical_entry.get_etymology_comment() if lexical_entry.get_term_source_language() is not None: ec = "<lang=\"" + lexical_entry.get_term_source_language() + "\">" + " " + ec return ec
mdf_lmf.update({ "__lx": lambda attributes, lx, lexical_entry: process_lx(attributes, lx, lexical_entry), "__se": lambda attributes, se, lexical_entry: lexical_entry. create_and_add_related_form(se, mdf_semanticRelation["se"]), "__nt": lambda attributes, nt, lexical_entry: process_nt(attributes, nt, lexical_entry), "__np": lambda attributes, np, lexical_entry: process_np(attributes, np, lexical_entry), "__ec": lambda attributes, ec, lexical_entry: process_ec(attributes, ec, lexical_entry), "__sd": lambda attributes, sd, lexical_entry: process_sd(attributes, sd, lexical_entry), "__cf": lambda attributes, cf, lexical_entry: process_cf(attributes, cf, lexical_entry), # Force first character of definitions to be in upper case "dv": lambda dv, lexical_entry: lexical_entry.set_definition( force_caps(dv), language=config.xml.vernacular), "de": lambda de, lexical_entry: lexical_entry.set_definition( add_final(force_caps(de), language=config.xml.English), language=config.xml.English), "dn": lambda dn, lexical_entry: lexical_entry.set_definition( add_final(force_caps(dn), language=config.xml.national), language=config.xml.national), "dr": lambda dr, lexical_entry: lexical_entry.set_definition( add_final(force_caps(dr), language=config.xml.regional), language=config.xml.regional), "df": lambda df, lexical_entry: lexical_entry.set_definition( add_final(force_caps(df), language=config.xml.French), language=config.xml.French) })
#! /usr/bin/env python # -*- coding: utf-8 -*- from config.mdf import mdf_lmf, lmf_mdf ## To define languages and fonts import config items=lambda lexical_entry: lexical_entry.get_lexeme() ## Functions to process some MDF fields (input) mdf_lmf.update({}) ## Functions to process some MDF fields (output) lmf_mdf.update({}) ## Functions to process some LaTeX fields (output) ## Function giving order in which information must be written in LaTeX and mapping between LMF representation and LaTeX (output)