def test_doc_write(self): import sys, os # Create LMF objects lexical_entry = LexicalEntry() lexical_entry.lemma = Lemma() lexical_entry.partOfSpeech = "toto" lexical_entry.status = "draft" lexical_entry.lemma.lexeme = "hello" lexicon = Lexicon() lexicon.add_lexical_entry(lexical_entry) lexical_resource = LexicalResource() lexical_resource.add_lexicon(lexicon) # Write document file and test result utest_path = sys.path[0] + '/' doc_filename = utest_path + "output.docx" doc_write(lexical_resource, doc_filename) doc_file = open(doc_filename, "r") doc_file.readlines() doc_file.close() # Customize mapping def lmf2doc(lexicon, document, items, sort_order, paradigms, reverse): return "test" # Write document file and test result doc_write(lexical_resource, doc_filename, None, lmf2doc) doc_file = open(doc_filename, "r") doc_file.readlines() doc_file.close() del lexical_entry.lemma lexical_entry.lemma = None del lexical_entry, lexicon lexicon = None del lexical_resource # Remove document file os.remove(doc_filename)
def test_odt_write(self): import sys, os # Create LMF objects lexical_entry = LexicalEntry() lexical_entry.lemma = Lemma() lexical_entry.partOfSpeech = "toto" lexical_entry.status = "draft" lexical_entry.lemma.lexeme = "hello" lexicon = Lexicon() lexicon.add_lexical_entry(lexical_entry) lexical_resource = LexicalResource() lexical_resource.add_lexicon(lexicon) # Write document file and test result utest_path = sys.path[0] + '/' odt_filename = utest_path + "output.odt" odt_write(lexical_resource, odt_filename) odt_file = open(odt_filename, "r") odt_file.readlines() odt_file.close() # Customize mapping def lmf2odt(lexicon, document, items, sort_order, paradigms, reverse): return "test" # Write document file and test result odt_write(lexical_resource, odt_filename, None, lmf2odt) odt_file = open(odt_filename, "r") odt_file.readlines() odt_file.close() del lexical_entry.lemma lexical_entry.lemma = None del lexical_entry, lexicon lexicon = None del lexical_resource # Remove document file os.remove(odt_filename)
def test_tex_write(self): import sys, os # Create LMF objects lexical_entry = LexicalEntry() lexical_entry.lemma = Lemma() lexical_entry.partOfSpeech = "toto" lexical_entry.status = "draft" lexical_entry.lemma.lexeme = "hello" lexicon = Lexicon() lexicon.add_lexical_entry(lexical_entry) lexical_resource = LexicalResource() lexical_resource.add_lexicon(lexicon) # Write LaTeX file and test result utest_path = sys.path[0] + '/' tex_filename = utest_path + "output.tex" tex_write(lexical_resource, tex_filename) tex_file = open(tex_filename, "r") begin_lines = [ EOL, "\\begin{document}" + EOL, "\\maketitle" + EOL, "\\newpage" + EOL, EOL, "\\def\\mytextsc{\\bgroup\\obeyspaces\\mytextscaux}" + EOL, "\\def\\mytextscaux#1{\\mytextscauxii #1\\relax\\relax\\egroup}" + EOL, "\\def\\mytextscauxii#1{%" + EOL, "\\ifx\\relax#1\\else \\ifcat#1\\@sptoken{} \\expandafter\\expandafter\\expandafter\\mytextscauxii\\else" + EOL, "\\ifnum`#1=\\uccode`#1 {\\normalsize #1}\\else {\\footnotesize \\uppercase{#1}}\\fi \\expandafter\\expandafter\\expandafter\\mytextscauxii\\expandafter\\fi\\fi}" + EOL, EOL, "\\setlength\\parindent{0cm}" + EOL, EOL, "\\addmediapath{.}" + EOL, "\\addmediapath{./mp3}" + EOL, "\\addmediapath{./wav}" + EOL, "\\graphicspath{{" + os.path.abspath('.') + "/pylmflib/output/img/}}" + EOL, EOL, "\\newpage" + EOL, "\\begin{multicols}{2}" + EOL, EOL ] end_lines = ["\end{multicols}" + EOL, "\end{document}" + EOL] expected_lines = [ "\\newpage" + EOL, "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" + EOL, #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL, "\\paragraph{\\hspace{-0.5cm} \\textbf{\ipa{hello}}} \\hypertarget{01}{}" + EOL, "\markboth{\\textbf{\\ipa{hello}}}{}" + EOL, "\\textit{Status:} draft" + EOL, "\lhead{\\firstmark}" + EOL, "\\rhead{\\botmark}" + EOL, EOL ] self.assertListEqual(begin_lines + expected_lines + end_lines, tex_file.readlines()) tex_file.close() # Customize mapping my_lmf_tex = dict({ "Lemma.lexeme": lambda lexical_entry: "is " + lexical_entry.get_lexeme( ) + "." + EOL, "LexicalEntry.id": lambda lexical_entry: "The lexical entry " + str(lexical_entry. get_id()) + " ", "LexicalEntry.partOfSpeech": lambda lexical_entry: "Its grammatical category is " + lexical_entry.get_partOfSpeech() + "." + EOL, "LexicalEntry.status": lambda lexical_entry: "Warning: " + lexical_entry.get_status( ) + " version!" + EOL }) my_order = [ "LexicalEntry.id", "Lemma.lexeme", "LexicalEntry.partOfSpeech", "LexicalEntry.status" ] def lmf2tex(entry, font): result = "" for attribute in my_order: result += my_lmf_tex[attribute](entry) return result # Write LaTeX file and test result tex_write(lexical_resource, tex_filename, None, None, lmf2tex, font) tex_file = open(tex_filename, "r") expected_lines = [ "\\newpage" + EOL, "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" + EOL, #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL, "The lexical entry 01 is hello." + EOL, "Its grammatical category is toto." + EOL, "Warning: draft version!" + EOL, "\lhead{\\firstmark}" + EOL, "\\rhead{\\botmark}" + EOL, EOL ] self.assertListEqual(begin_lines + expected_lines + end_lines, tex_file.readlines()) tex_file.close() del lexical_entry.lemma lexical_entry.lemma = None del lexical_entry, lexicon lexicon = None del lexical_resource # Remove LaTeX file os.remove(tex_filename)
def test_tex_write(self): import sys, os # Create LMF objects lexical_entry = LexicalEntry() lexical_entry.lemma = Lemma() lexical_entry.partOfSpeech = "toto" lexical_entry.status = "draft" lexical_entry.lemma.lexeme = "hello" lexicon = Lexicon() lexicon.add_lexical_entry(lexical_entry) lexical_resource = LexicalResource() lexical_resource.add_lexicon(lexicon) # Write LaTeX file and test result utest_path = sys.path[0] + '/' tex_filename = utest_path + "output.tex" tex_write(lexical_resource, tex_filename) tex_file = open(tex_filename, "r") begin_lines = [EOL, "\\begin{document}" + EOL, "\\maketitle" + EOL, "\\newpage" + EOL, EOL, "\\def\\mytextsc{\\bgroup\\obeyspaces\\mytextscaux}" + EOL, "\\def\\mytextscaux#1{\\mytextscauxii #1\\relax\\relax\\egroup}" + EOL, "\\def\\mytextscauxii#1{%" + EOL, "\\ifx\\relax#1\\else \\ifcat#1\\@sptoken{} \\expandafter\\expandafter\\expandafter\\mytextscauxii\\else" + EOL, "\\ifnum`#1=\\uccode`#1 {\\normalsize #1}\\else {\\footnotesize \\uppercase{#1}}\\fi \\expandafter\\expandafter\\expandafter\\mytextscauxii\\expandafter\\fi\\fi}" + EOL, EOL, "\\setlength\\parindent{0cm}" + EOL, EOL, "\\addmediapath{.}" + EOL, "\\addmediapath{./mp3}" + EOL, "\\addmediapath{./wav}" + EOL, "\\graphicspath{{" + os.path.abspath('.') + "/pylmflib/output/img/}}" + EOL, EOL, "\\newpage" + EOL, "\\begin{multicols}{2}" + EOL, EOL ] end_lines = [ "\end{multicols}" + EOL, "\end{document}" + EOL ] expected_lines = [ "\\newpage" + EOL, "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" + EOL, #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL, "\\paragraph{\\hspace{-0.5cm} \\textbf{\ipa{hello}}} \\hypertarget{01}{}" + EOL, "\markboth{\\textbf{\\ipa{hello}}}{}" + EOL, "\\textit{Status:} draft" + EOL, "\lhead{\\firstmark}" + EOL, "\\rhead{\\botmark}" + EOL, EOL ] self.assertListEqual(begin_lines + expected_lines + end_lines, tex_file.readlines()) tex_file.close() # Customize mapping my_lmf_tex = dict({ "Lemma.lexeme" : lambda lexical_entry: "is " + lexical_entry.get_lexeme() + "." + EOL, "LexicalEntry.id" : lambda lexical_entry: "The lexical entry " + str(lexical_entry.get_id()) + " ", "LexicalEntry.partOfSpeech" : lambda lexical_entry: "Its grammatical category is " + lexical_entry.get_partOfSpeech() + "." + EOL, "LexicalEntry.status" : lambda lexical_entry: "Warning: " + lexical_entry.get_status() + " version!" + EOL }) my_order = ["LexicalEntry.id", "Lemma.lexeme", "LexicalEntry.partOfSpeech", "LexicalEntry.status"] def lmf2tex(entry, font): result = "" for attribute in my_order: result += my_lmf_tex[attribute](entry) return result # Write LaTeX file and test result tex_write(lexical_resource, tex_filename, None, None, lmf2tex, font) tex_file = open(tex_filename, "r") expected_lines = [ "\\newpage" + EOL, "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" + EOL, #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL, "The lexical entry 01 is hello." + EOL, "Its grammatical category is toto." + EOL, "Warning: draft version!" + EOL, "\lhead{\\firstmark}" + EOL, "\\rhead{\\botmark}" + EOL, EOL ] self.assertListEqual(begin_lines + expected_lines + end_lines, tex_file.readlines()) tex_file.close() del lexical_entry.lemma lexical_entry.lemma = None del lexical_entry, lexicon lexicon = None del lexical_resource # Remove LaTeX file os.remove(tex_filename)
def config_read(filename): """! @brief Read an XML file giving the user configuration. @param filename The name of the XML file to read with full path, for instance 'pylmflib/pylmflib/config/default/config.xml'. @return A Lexical Resource. """ import os import config.xml configuration = parse_xml(filename) # Parse XML elements for format in configuration: if format.tag == "Language": # XML element "Language" have several XML subelements "lang" for lang in format: # XML elements "lang" have 2 XML attributes: one for the nature of the language ("att"), a second for the language code ("val") exec("config.xml." + lang.attrib["att"] + " = '" + lang.attrib["val"] + "'") elif format.tag == "Font": config.xml.font = dict() # XML element "Font" have several XML subelements "font" for font in format: # XML elements "font" have 2 XML attributes: one for the nature of the language ("att"), a second for the variable name ("var") exec("l = lambda " + font.attrib['var'] + ": " + font.text) config.xml.font.update({font.attrib['att']: l}) elif format.tag == "LMF": # Create lexical resource and set DTD version lexical_resource = LexicalResource(format[0].attrib["dtdVersion"]) for object in format[0]: if object.tag == "GlobalInformation": # Set global information for feat in object: if feat.attrib["att"] == "languageCode": lexical_resource.set_language_code(feat.attrib["val"]) elif feat.attrib["att"] == "author": lexical_resource.set_author(feat.attrib["val"]) elif feat.attrib["att"] == "version": lexical_resource.set_version(feat.attrib["val"]) elif feat.attrib["att"] == "lastUpdate": lexical_resource.set_last_update(feat.attrib["val"]) elif feat.attrib["att"] == "license": lexical_resource.set_license(feat.attrib["val"]) elif feat.attrib["att"] == "characterEncoding": lexical_resource.set_character_encoding(feat.attrib["val"]) elif feat.attrib["att"] == "dateCoding": lexical_resource.set_date_coding(feat.attrib["val"]) elif feat.attrib["att"] == "creationDate": lexical_resource.set_creation_date(feat.attrib["val"]) elif feat.attrib["att"] == "projectName": lexical_resource.set_project_name(feat.attrib["val"]) elif feat.attrib["att"] == "description": lexical_resource.set_description(feat.attrib["val"]) elif object.tag == "Lexicon": # Create lexicon and set identifier lexicon = Lexicon(object.attrib["id"]) # Set lexicon attributes for feat in object: if feat.attrib["att"] == "language": lexicon.set_language(feat.attrib["val"]) elif feat.attrib["att"] == "languageScript": lexicon.set_languageScript(feat.attrib["val"]) elif feat.attrib["att"] == "label": lexicon.set_label(feat.attrib["val"]) elif feat.attrib["att"] == "lexiconType": lexicon.set_lexiconType(feat.attrib["val"]) elif feat.attrib["att"] == "entrySource": lexicon.set_entrySource(feat.attrib["val"]) elif feat.attrib["att"] == "localPath": lexicon.set_localPath(feat.attrib["val"]) # Set absolute path to audio files config.xml.audio_path = os.path.abspath(os.path.abspath('.') + "/" + feat.attrib["val"]) + "/" # Attach lexicon to the lexical resource lexical_resource.add_lexicon(lexicon) elif format.tag == "MDF": for mdf in format: if mdf.tag == "mdf_lmf": # XML elements "mdf_lmf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var") exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text) mdf_lmf.update({mdf.attrib['marker']: l}) elif mdf.tag == "ps_partOfSpeech": # XML elements "ps_partOfSpeech" have 2 XML attributes: one for the MDF value ("ps"), a second for the LMF value ("partOfSpeech") ps_partOfSpeech.update({mdf.attrib['ps']: mdf.attrib['partOfSpeech']}) # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute --> partOfSpeech_range.add(mdf.attrib['partOfSpeech']) # And automatically update the reverse operation partOfSpeech_tex.update({mdf.attrib['partOfSpeech']: mdf.attrib['ps']}) elif mdf.tag == "pdl_paradigmLabel": # XML elements "pdl_paradigmLabel" have 2 XML attributes: one for the MDF value ("pdl"), a second for the LMF value ("paradigmLabel") pdl_paradigmLabel.update({mdf.attrib['pdl']: mdf.attrib['paradigmLabel']}) # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute --> paradigmLabel_range.add(mdf.attrib['paradigmLabel']) # And automatically update the reverse operation paradigmLabel_tex.update({mdf.attrib['paradigmLabel']: mdf.attrib['pdl']}) elif mdf.tag == "lmf_mdf": # XML elements "lmf_mdf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var") exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text) lmf_mdf.update({mdf.attrib['marker']: l}) elif mdf.tag == "mdf_order": mdf_order = [] for element in mdf: mdf_order.append(element.tag) list1 = [] for level1 in element: list1.append(level1.tag) list2 = [] for level2 in level1: list2.append(level2.tag) if len(list2) != 0: list1.append(list2) if len(list1) != 0: mdf_order.append(list1) elif format.tag == "LaTeX": for param in format: if param.tag == "partOfSpeech_tex": # XML elements "partOfSpeech_tex" have 2 or 3 XML attributes: one for the LMF value ("partOfSpeech"), a second for the LaTeX value ("tex"), and an optional one to define language try: partOfSpeech_tex.update({(param.attrib['lang'], param.attrib['partOfSpeech']): param.attrib['tex']}) except KeyError: partOfSpeech_tex.update({param.attrib['partOfSpeech']: param.attrib['tex']}) # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute --> partOfSpeech_range.add(param.attrib['partOfSpeech']) elif param.tag == "paradigmLabel_tex": # XML elements "paradigmLabel_tex" have 2 XML attributes: one for the LMF value ("paradigmLabel"), a second for the LaTeX value ("tex") paradigmLabel_tex.update({param.attrib['paradigmLabel']: param.attrib['tex']}) # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute --> paradigmLabel_range.add(param.attrib['paradigmLabel']) else: raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename) return lexical_resource
class TestLexicalResourceFunctions(unittest.TestCase): def setUp(self): # Instantiate a LexicalResource object self.lexical_resource = LexicalResource() def tearDown(self): # Release instantiated objects del self.lexical_resource def test_init(self): self.assertEqual(self.lexical_resource.dtdVersion, 16) self.assertIsInstance(self.lexical_resource.global_information, GlobalInformation) self.assertListEqual(self.lexical_resource.lexicon, []) self.assertListEqual(self.lexical_resource.speaker, []) def test_get_lexicons(self): # Create lexicons lexicon1 = Lexicon() lexicon2 = Lexicon() # Add lexicons to the lexical resource self.lexical_resource.lexicon = [lexicon1, lexicon2] # Test get lexicons self.assertListEqual(self.lexical_resource.get_lexicons(), [lexicon1, lexicon2]) # Release Lexicon instances del self.lexical_resource.lexicon[:] del lexicon1, lexicon2 def test_add_lexicon(self): # Create lexicons lexicon1 = Lexicon() lexicon2 = Lexicon() # Test add lexicons to the lexical resource self.assertEqual(self.lexical_resource.add_lexicon(lexicon1), self.lexical_resource) self.assertListEqual(self.lexical_resource.lexicon, [lexicon1]) self.assertEqual(self.lexical_resource.add_lexicon(lexicon2), self.lexical_resource) self.assertListEqual(self.lexical_resource.lexicon, [lexicon1, lexicon2]) # Release Lexicon instances del self.lexical_resource.lexicon[:] del lexicon1, lexicon2 def test_remove_lexicon(self): # Create lexicons lexicon1 = Lexicon() lexicon2 = Lexicon() # Add lexicons to the lexical resource self.lexical_resource.lexicon = [lexicon1, lexicon2] # Test remove lexicons self.assertEqual(self.lexical_resource.remove_lexicon(lexicon1), self.lexical_resource) self.assertListEqual(self.lexical_resource.lexicon, [lexicon2]) self.assertEqual(self.lexical_resource.remove_lexicon(lexicon2), self.lexical_resource) self.assertListEqual(self.lexical_resource.lexicon, []) # Release Lexicon instances del lexicon1, lexicon2 def test_get_lexicon(self): # Create lexicons lexicon1 = Lexicon("lexicon1") lexicon2 = Lexicon("lexicon2") # Add lexicons to the lexical resource self.lexical_resource.lexicon = [lexicon1, lexicon2] # Test get lexicon self.assertIsNone( self.lexical_resource.get_lexicon("unknown identifier")) self.assertEqual(self.lexical_resource.get_lexicon("lexicon2"), lexicon2) # Release Lexicon instances del lexicon1, lexicon2 def test_set_dtdVersion(self): version = "0" self.assertEqual(self.lexical_resource.set_dtdVersion(version), self.lexical_resource) self.assertEqual(self.lexical_resource.dtdVersion, version) def test_get_dtdVersion(self): self.assertIs(self.lexical_resource.get_dtdVersion(), self.lexical_resource.dtdVersion) def test_set_language_code(self): code = "iso" self.assertEqual(self.lexical_resource.set_language_code(code), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.languageCode, code) def test_get_language_code(self): self.assertIs(self.lexical_resource.get_language_code(), self.lexical_resource.global_information.languageCode) def test_set_version(self): version = "0" self.assertEqual(self.lexical_resource.set_version(version), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.version, version) def test_get_version(self): self.assertIs(self.lexical_resource.get_version(), self.lexical_resource.global_information.version) def test_set_license(self): license = "free" self.assertEqual(self.lexical_resource.set_license(license), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.license, license) def test_get_license(self): self.assertIs(self.lexical_resource.get_license(), self.lexical_resource.global_information.license) def test_set_character_encoding(self): coding = "iso" self.assertEqual(self.lexical_resource.set_character_encoding(coding), self.lexical_resource) self.assertEqual( self.lexical_resource.global_information.characterEncoding, coding) def test_get_character_encoding(self): self.assertIs( self.lexical_resource.get_character_encoding(), self.lexical_resource.global_information.characterEncoding) def test_set_date_coding(self): coding = "iso" self.assertEqual(self.lexical_resource.set_date_coding(coding), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.dateCoding, coding) def test_get_date_coding(self): self.assertIs(self.lexical_resource.get_date_coding(), self.lexical_resource.global_information.dateCoding) def test_set_project_name(self): name = "project" self.assertEqual(self.lexical_resource.set_project_name(name), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.projectName, name) def test_get_project_name(self): self.assertIs(self.lexical_resource.get_project_name(), self.lexical_resource.global_information.projectName) def test_set_creation_date(self): date = "2014-10-08" self.assertEqual(self.lexical_resource.set_creation_date(date), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.creationDate, date) def test_get_creation_date(self): self.assertIs(self.lexical_resource.get_creation_date(), self.lexical_resource.global_information.creationDate) def test_set_last_update(self): date = "2014-10-10" self.assertEqual(self.lexical_resource.set_last_update(date), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.lastUpdate, date) def test_get_last_update(self): self.assertIs(self.lexical_resource.get_last_update(), self.lexical_resource.global_information.lastUpdate) def test_set_author(self): author = "My Name" self.assertEqual(self.lexical_resource.set_author(author), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.author, author) def test_get_author(self): self.assertIs(self.lexical_resource.get_author(), self.lexical_resource.global_information.author) def test_set_description(self): descr = "This is a short description of this lexical resource." self.assertEqual(self.lexical_resource.set_description(descr), self.lexical_resource) self.assertEqual(self.lexical_resource.global_information.description, descr) def test_get_description(self): self.assertIs(self.lexical_resource.get_description(), self.lexical_resource.global_information.description) def test_get_bibliographic_citation(self): self.assertIs( self.lexical_resource.get_bibliographic_citation(), self.lexical_resource.global_information.bibliographicCitation)