def lexicalize_language(language, lexicalized_title, realize="random"): r""" das Buch ist Deutsch. das Buch ist in deutscher Sprache. @type language: C{tuple} of (C{str}, C{str}) @param language: ("English", "neutral") or ("German", "neutral"). @type lexicalized_title: C{Diamond} NOTE: negation isn't possible w/ the current grammar ("nicht auf Deutsch") realize "das Buch ist in englischer Sprache": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_language(("English", ""), title, realize="adjective")) ['das Buch in englischer Sprache ist', 'das Buch ist in englischer Sprache', 'ist das Buch in englischer Sprache'] realize "das Buch ist auf Deutsch": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_language(("German", ""), title, realize="noun")) ['das Buch auf Deutsch ist', 'das Buch ist auf Deutsch', 'ist das Buch auf Deutsch'] """ assert realize in ("noun", "adjective", "random") if realize == "random": realize = random.choice(["noun", "adjective"]) language_val, rating = language languages = {"German": "Deutsch", "English": "Englisch"} tempus = gen_tempus("präs") subj = lexicalized_title subj.change_mode("SUBJ") lang_num = gen_num("sing") if realize == "noun": noun_prep = gen_prep("auf", "zusammenhang") language_str = languages[language_val] # "Deutsch", "Englisch" prkompl = create_diamond("PRKOMPL", "abstraktum", language_str, [lang_num, noun_prep]) elif realize == "adjective": adjective_prep = gen_prep("in", "zusammenhang") language_str = languages[language_val].lower() # "deutsch", "englisch" language_mod = gen_mod(language_str, "eigenschaft") language_mod.append_subdiamond(gen_komp("pos")) prkompl = create_diamond("PRKOMPL", "sorte", "Sprache", [lang_num, adjective_prep, language_mod]) return create_diamond("", u"prädikation", "sein-kop", [tempus, subj, prkompl])
def lexicalize_lastbook_match(lastbook_match_message_block): r""" lexicalize all the messages contained in a lastbook_match message block (aka ``Message``) :type: ``Message`` :param: a message (of type "lastbook_match") :rtype: ``List`` of ``Diamond``s :return: a list of lexicalized phrases, which can be realized with ``tccg`` directly or turned into sentences beforehand with ``lexicalization.phrase2sentence`` to remove ambiguity possible: sowohl X als auch Y / beide Bücher implemented: beide Bücher TODO: implement lexicalize_pagerange """ assert lastbook_match_message_block[Feature("msgType")] == "lastbook_match" msg_block = deepcopy(lastbook_match_message_block) num = gen_num("plur") art = gen_art("quantbeide") agens = create_diamond("AGENS", "artefaktum", "Buch", [num, art]) lxed_phrses = [] for msg_name, msg in msg_block.items(): if isinstance(msg_name, str) and msg_name not in ("lastbook_authors", "lastbook_title", "pagerange"): lexicalize_function_name = "lexicalize_" + msg_name lxed_phrses.append( eval(lexicalize_function_name)(msg, lexicalized_title=agens)) return lxed_phrses
def phrase2sentence(diamond): """ turns the lexicalization of a phrase (e.g. "das Buch ist neu") into the lexicalization of a sentence, e.g. "das Buch ist neu ." (the initial letter of a sentence will not be written in uppercase, as the grammar cannot cope with implicit upper/lowercase distinctions). """ assert isinstance(diamond, Diamond) diamond.change_mode("DEKL") return create_diamond("", "deklarativ", "punkt", [diamond])
def lexicalize_proglang(proglang, lexicalized_title=None, lexicalized_authors=None, realize="embedded"): r""" @type proglang: C{tuple} of (C{frozenset}, C{str}) @param proglang: a tuple consisting of a set of programming languages (as strings) and a rating (string) @type realize: C{str} @param realize: "embedded" or "complete". if "embedded", the function will just generate a noun phrase, e.g. "die Programmiersprache Perl". if "complete", it will generate a sentence, e.g. "das Buch verwendet die Programmiersprache(n) X (und Y)" or "der Autor/ die Autoren verwenden die Programmiersprache(n) X (und Y)". realize "keine Programmiersprache": >>> openccg.realize(lexicalize_proglang((frozenset([]), ""), realize="embedded")) ['keine Programmiersprache', 'keiner Programmiersprache'] realize "die Programmiersprachen A, B und C": >>> openccg.realize(lexicalize_proglang((frozenset(["Python" ,"Lisp", "C++"]), ""), realize="embedded")) ['den Programmiersprachen Python , Lisp und C++', 'der Programmiersprachen Python , Lisp und C++', 'die Programmiersprachen Python , Lisp und C++'] realize two authors who use several programming languages: >>> authors = lexicalize_authors((["Horst Lohnstein", "Ralf Klabunde"], ""), realize="lastnames") >>> openccg.realize(lexicalize_proglang((frozenset(["Python" ,"Lisp", "C++"]), ""), lexicalized_authors=authors, realize="complete")) ['Lohnstein und Klabunde die Programmiersprachen Python , Lisp und C++ verwenden', 'Lohnstein und Klabunde verwenden die Programmiersprachen Python , Lisp und C++', 'verwenden Lohnstein und Klabunde die Programmiersprachen Python , Lisp und C++'] realize a book title with several programming languages: >>> title = lexicalize_title(("Natural Language Processing", ""), realize="complete") >>> openccg.realize(lexicalize_proglang((frozenset(["Python" ,"Lisp", "C++"]), ""), lexicalized_title=title, realize="complete")) ['verwendet \xe2\x80\x9e Natural_Language_Processing \xe2\x80\x9c die Programmiersprachen Python , Lisp und C++', '\xe2\x80\x9e Natural_Language_Processing \xe2\x80\x9c die Programmiersprachen Python , Lisp und C++ verwendet', '\xe2\x80\x9e Natural_Language_Processing \xe2\x80\x9c verwendet die Programmiersprachen Python , Lisp und C++'] """ assert lexicalized_title or lexicalized_authors or realize == "embedded", \ "requires either a lexicalized title, a lexicalized set of authors or"\ " realize parameter == 'embedded'" if realize == "embedded": return gen_proglang(proglang, mode="N") #just realize a noun prase, e.g. "die Prog.sprachen A und B" elif realize == "complete": temp = gen_tempus("präs") if lexicalized_title: agens = lexicalized_title elif lexicalized_authors: agens = lexicalized_authors agens.change_mode("AGENS") patiens = gen_proglang(proglang, mode="PATIENS") return create_diamond("", "handlung", "verwenden", [temp, agens, patiens])
def gen_pages_extra(length_description, lexicalized_title): """ das Buch ist etwas kurz das Buch ist sehr umfangreich @type length_description: C{str} @param length_description: "very long", "very short" """ tempus = gen_tempus("präs") subj = lexicalized_title subj.change_mode("SUBJ") if length_description == "very long": prkompl = create_diamond("PRKOMPL", "eigenschaft", "umfangreich", [gen_komp("pos"), gen_spez("sehr", "intensivierung")]) elif length_description == "very short": prkompl = create_diamond("PRKOMPL", "eigenschaft", "kurz", [gen_komp("pos"), gen_spez("etwas", u"abschwächung")]) return create_diamond("", u"prädikation", "sein-kop", [tempus, subj, prkompl])
def gen_length_lastbook_nomatch(length, lexicalized_title, lexicalized_lastbooktitle): """ @type length: C{Diamond} @param length: a feature structure that compares the length of two books:: [ direction = '+' ] [ ] [ magnitude = [ number = 122 ] ] [ [ unit = 'pages' ] ] [ ] [ rating = 'neutral' ] [ type = 'RelativeVariation' ] """ if length["direction"] == "-": comp_lex = "kürzer" else: comp_lex = "länger" tempus = gen_tempus("präs") subj = lexicalized_title subj.change_mode("SUBJ") page_diff = str(length["magnitude"]["number"]) page_diff_diamond = gen_mod(page_diff, "kardinal") comparison = create_diamond("MOD", "eigenschaft", comp_lex, [gen_komp("komp")]) mod = create_diamond("MOD", "artefaktum", "Seite", [gen_num("plur"), page_diff_diamond, comparison]) kompar = lexicalized_lastbooktitle kompar.change_mode("KOMPAR") prkompl = create_diamond("PRKOMPL", "adjunktion", "adjunktor", [mod, kompar]) return create_diamond("", u"prädikation", "sein-kop", [tempus, subj, prkompl])
def lexicalize_exercises(exercises, lexicalized_title, lexeme="random"): r""" das Buch enthält/beinhaltet (keine) Übungen. @type exercises: C{tuple} of (C{int}, C{str}) @param exercises: a tuple stating if a book contains exercises (1, "neutral") or not (0, "neutral"). @type lexicalized_title: C{Diamond} describing a book title realize "das Buch enthält Übungen": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_exercises((1, ""), title, lexeme="enthalten")) ['das Buch enth\xc3\xa4lt \xc3\x9cbungen', 'das Buch \xc3\x9cbungen enth\xc3\xa4lt', 'enth\xc3\xa4lt das Buch \xc3\x9cbungen'] realize "das Buch beinhaltet keine Übungen": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_exercises((0, ""), title, lexeme="beinhalten")) ['beinhaltet das Buch keine \xc3\x9cbungen', 'das Buch beinhaltet keine \xc3\x9cbungen', 'das Buch keine \xc3\x9cbungen beinhaltet'] """ assert lexeme in ("beinhalten", "enthalten", "random") if lexeme == "random": lexeme = random.choice(["beinhalten", "enthalten"]) exercises_val, rating = exercises tempus = gen_tempus("präs") agens = lexicalized_title agens.change_mode("AGENS") if exercises_val == 1: patiens = create_diamond("PATIENS", "abstraktum", u"Übung", [gen_num("plur")]) else: modifier = gen_art("quantkein") patiens = create_diamond("PATIENS", "abstraktum", u"Übung", [gen_num("plur"), modifier]) return create_diamond("", "durativ", lexeme, [tempus, agens, patiens])
def gen_pages_id(pages_int, lexicalized_title, lexeme="random"): """ @type pages_int: C{int} @param pages_int: number of pages of a book """ tempus = gen_tempus("präs") title = lexicalized_title title.change_mode("AGENS") pages_num = gen_num("plur") pages_mod = gen_mod(pages_int, "kardinal") pages_nom = "artefaktum" pages_prop = "Seite" if lexeme == "random": lexeme = random.choice(["umfang", "umfassen", "länge"]) if lexeme == "umfang": preposition = gen_prep("von", u"zugehörigkeit") attrib = create_diamond("ATTRIB", pages_nom, pages_prop, [pages_num, preposition, pages_mod]) patiens_num = gen_num("sing") art = gen_art("indef") patiens = create_diamond("PATIENS", "abstraktum", "Umfang", [patiens_num, art, attrib]) return create_diamond("", "durativ", "haben", [tempus, title, patiens]) elif lexeme == "umfassen": patiens = create_diamond("PATIENS", pages_nom, pages_prop, [pages_num, pages_mod]) return create_diamond("", "handlung", "umfassen", [tempus, title, patiens]) elif lexeme == "länge": title.change_mode("SUBJ") komp_mod = create_diamond("MOD", "eigenschaft", "lang", [gen_komp("pos")]) prkompl = create_diamond("PRKOMPL", pages_nom, pages_prop, [pages_num, pages_mod, komp_mod]) return create_diamond("", u"prädikation", "sein-kop", [tempus, title, prkompl])
def lexicalize_codeexamples(examples, lexicalized_title, lexicalized_proglang=None, lexeme="random"): r""" das Buch enthält (keine) Code-Beispiele (in der Programmiersprache X). das Buch beinhaltet (keine) Code-Beispiele. das Buch enthält Code-Beispiele in den Programmiersprachen A und B. „On Syntax“ beinhaltet keine Code-Beispiele. @type examples: C{tuple} of (C{int}, C{str}) @param examples: a tuple, e.g. (0, 'neutral'), describing if a book uses code examples (1) or not (0) @type lexicalized_title: C{Diamond} @type lexicalized_proglang: C{Diamond} or C{NoneType} @type lexeme: C{str} @param lexeme: "beinhalten", "enthalten" or "random". realize "das Buch enthält Code-Beispiele": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_codeexamples((1, ""), lexicalized_title=title, lexeme="enthalten")) ['das Buch Code-Beispiele enth\xc3\xa4lt', 'das Buch enth\xc3\xa4lt Code-Beispiele', 'enth\xc3\xa4lt das Buch Code-Beispiele'] realize "das Buch enthält keine Code-Beispiele": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_codeexamples((0, ""), lexicalized_title=title, lexeme="enthalten")) ['das Buch enth\xc3\xa4lt keine Code-Beispiele', 'das Buch keine Code-Beispiele enth\xc3\xa4lt', 'enth\xc3\xa4lt das Buch keine Code-Beispiele'] realize "das Buch enthält Code-Beispiele in den Programmiersprachen A + B": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> plang = lexicalize_proglang((["Ada","Scheme"], ""), realize="embedded") >>> openccg.realize(lexicalize_codeexamples((1, ""), lexicalized_title=title, lexicalized_proglang=plang, lexeme="enthalten")) ['das Buch Code-Beispiele in den Programmiersprachen Ada und Scheme enth\xc3\xa4lt', 'das Buch enth\xc3\xa4lt Code-Beispiele in den Programmiersprachen Ada und Scheme', 'enth\xc3\xa4lt das Buch Code-Beispiele in den Programmiersprachen Ada und Scheme'] realize "d. Buch von X + Y beinhaltet Code-Bsp. in den Prog.sprachen A + B" >>> authors = lexicalize_authors((["Alan Kay", "John Hopcroft"], ""), realize="lastnames") >>> title = lexicalize_title(("foo", ""), lexicalized_authors=authors, realize="abstract", authors_realize="preposition") >>> plang = lexicalize_proglang((["Ada","Scheme"], ""), realize="embedded") >>> openccg.realize(lexicalize_codeexamples((1, ""), lexicalized_title=title, lexicalized_proglang=plang, lexeme="beinhalten")) ['beinhaltet das Buch von Kay und Hopcroft Code-Beispiele in den Programmiersprachen Ada und Scheme', 'das Buch von Kay und Hopcroft Code-Beispiele in den Programmiersprachen Ada und Scheme beinhaltet', 'das Buch von Kay und Hopcroft beinhaltet Code-Beispiele in den Programmiersprachen Ada und Scheme'] """ assert lexeme in ("beinhalten", "enthalten", "random") if lexeme == "random": lexeme = random.choice(("beinhalten", "enthalten")) examples_val, rating = examples modifier = None temp = gen_tempus("präs") agens = lexicalized_title agens.change_mode("AGENS") if examples_val == 0: #contains no examples modifier = gen_art("quantkein") else: if lexicalized_proglang: #contains examples in programming language X modifier = lexicalized_proglang preposition = gen_prep("in", "zusammenhang") modifier.insert_subdiamond(1, preposition) modifier.change_mode("ATTRIB") if modifier: patiens = create_diamond("PATIENS", "abstraktum", "Code-Beispiel", [gen_num("plur"), modifier]) else: #contains examples but programming language is unspecified patiens = create_diamond("PATIENS", "abstraktum", "Code-Beispiel", [gen_num("plur")]) return create_diamond("", "durativ", lexeme, [temp, agens, patiens])
def lexicalize_target(target, lexicalized_title): r""" das Buch richtet sich an Anfänger an Einsteiger mit Grundkenntnissen an Fortgeschrittene an Experten NOTE: we could add these to the grammar: - das Buch setzt keine Kenntnisse voraus - das Buch richtet sich an ein fortgeschrittenes Publikum @type target: C{tuple} of (C{int}, C{str}) @param target: a tuple, e.g. (0, "neutral"), states that the book is targeted towards beginners. @type lexicalized_title: C{Diamond} realize "... richtet sich an Anfänger": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_target((0, ""), title)) ['das Buch richtet sich an Anf\xc3\xa4nger', 'richtet sich das Buch an Anf\xc3\xa4nger', 'sich das Buch an Anf\xc3\xa4nger richtet'] realize "... richtet sich an Einsteiger mit Grundkenntnissen": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_target((1, ""), title)) ['das Buch richtet sich an Einsteiger mit Grundkenntnissen', 'richtet sich das Buch an Einsteiger mit Grundkenntnissen', 'sich das Buch an Einsteiger mit Grundkenntnissen richtet'] realize "... richtet sich an Fortgeschrittene": >>> title = lexicalize_title(("foo", ""), realize="abstract") >>> openccg.realize(lexicalize_target((2, ""), title)) ['das Buch richtet sich an Fortgeschrittene', 'richtet sich das Buch an Fortgeschrittene', 'sich das Buch an Fortgeschrittene richtet'] realize "... richtet sich an Experten": >>> target = lexicalize_target((3, ""), title) >>> openccg.realize(target) ['das Buch richtet sich an Experten', 'richtet sich das Buch an Experten', 'sich das Buch an Experten richtet'] """ target_val, rating = target targets = {0: "Anfänger", 1: "Einsteiger", 2:"Fortgeschritten", 3: "Experte"} tempus = gen_tempus("präs") agens = lexicalized_title agens.change_mode("AGENS") reflexive_pronoun = gen_pronoun(3, "reflpro", "neut", "sing", mode="PRO") target_num = gen_num("plur") target_prep = gen_prep("an", "gerichtetebez") patiens = create_diamond("PATIENS", "experte", targets[target_val], [target_num, target_prep]) if target_val == 1: # add "mit Grundkenntnissen" to "Einsteiger" attrib_num = gen_num("plur") attrib_prep = gen_prep("mit", u"zugehörigkeit") attrib = create_diamond("ATTRIB", "abstraktum", "Grundkenntnis", [attrib_num, attrib_prep]) patiens.append_subdiamond(attrib) return create_diamond("", "handlung", "s.richten_an", [tempus, agens, reflexive_pronoun, patiens])
def lexicalize_keywords(keywords_tuple, lexicalized_title=None, lexicalized_authors = None, realize="complete", lexeme="random"): r""" @type keywords_tuple: C{tuple} of (C{frozenset} of C{str}, C{str}) @param keywords_tuple: e.g. (frozenset(['generation', 'discourse', 'semantics', 'parsing']), 'neutral') @type realize: C{str} @param realize: "abstract", "complete". "abstract" realizes 'das Thema' / 'die Themen'. "complete" realizes an enumeration of those keywords. realize one keyword abstractly, using an abstract author and the lexeme I{behandeln}: >>> author = lexicalize_authors((["author1"], ""), realize="abstract") >>> openccg.realize(lexicalize_keywords((frozenset(["keyword1"]), ""), lexicalized_authors=author, realize="abstract", lexeme="behandeln")) ['behandelt der Autor das Thema', 'der Autor behandelt das Thema', 'der Autor das Thema behandelt'] realize one keyword concretely, using two concrete authors and the lexeme I{beschreiben}: >>> authors = lexicalize_authors((["John E. Hopcroft","Jeffrey D. Ullman"], ""), realize="complete") >>> openccg.realize(lexicalize_keywords((frozenset(["parsing", "formal languages"]), ""), lexicalized_authors=authors, realize="complete", lexeme="beschreiben")) ['John E. Hopcroft und Jeffrey D. Ullman beschreiben die Themen formal_languages und parsing', 'John E. Hopcroft und Jeffrey D. Ullman die Themen formal_languages und parsing beschreiben', 'beschreiben John E. Hopcroft und Jeffrey D. Ullman die Themen formal_languages und parsing'] realize 4 keywords, using 1 author's last name and the lexeme I{eingehen}: >>> author = lexicalize_authors((["Ralph Grishman"], ""), realize="lastnames") >>> openccg.realize( lexicalize_keywords((frozenset(["parsing","semantics","discourse","generation"]), ""), lexicalized_authors=author, realize="complete", lexeme="eingehen")) ['Grishman geht auf den Themen discourse , generation , parsing und semantics ein', 'Grishman geht auf die Themen discourse , generation , parsing und semantics ein', 'geht Grishman auf den Themen discourse , generation , parsing und semantics ein', 'geht Grishman auf die Themen discourse , generation , parsing und semantics ein'] TODO: "___ geht auf den Themen ein" is not OK realize 1 keyword, using an abstract book title and the lexeme I{aufgreifen}: >>> title = lexicalize_title(("book1", ""), realize="abstract") >>> openccg.realize(lexicalize_keywords((frozenset(["regular expressions"]), ""), lexicalized_title=title, realize="complete", lexeme="aufgreifen")) ['das Buch greift das Thema regular_expressions auf', 'greift das Buch das Thema regular_expressions auf'] realize 2 keywords, using a concrete book title and the lexeme I{beschreiben}: >>> title = lexicalize_title(("Grundlagen der Computerlinguistik", ""), realize="complete") >>> openccg.realize(lexicalize_keywords((frozenset(["grammar", "corpora"]), ""), lexicalized_title=title, realize="complete", lexeme="beschreiben")) ['beschreibt \xe2\x80\x9e Grundlagen_der_Computerlinguistik \xe2\x80\x9c die Themen corpora und grammar', '\xe2\x80\x9e Grundlagen_der_Computerlinguistik \xe2\x80\x9c beschreibt die Themen corpora und grammar', '\xe2\x80\x9e Grundlagen_der_Computerlinguistik \xe2\x80\x9c die Themen corpora und grammar beschreibt'] """ keywords, rating = keywords_tuple assert realize in ("abstract", "complete"), \ "choose 1 of these keyword realizations: abstract, complete" num_of_keywords = len(keywords) assert lexeme in ("behandeln", "beschreiben", "eingehen", "aufgreifen", "random") if lexeme == "random": lexeme = random.choice(["behandeln", "beschreiben", "eingehen", "aufgreifen"]) if realize == "abstract": patiens = gen_abstract_keywords(num_of_keywords) elif realize == "complete": patiens = gen_keywords(keywords, mode="N") patiens.change_mode("PATIENS") assert lexicalized_authors or lexicalized_title, \ "keywords need a lexicalized title or author(s) to be realized" if lexicalized_title: agens = lexicalized_title elif lexicalized_authors: agens = lexicalized_authors agens.change_mode("AGENS") temp = gen_tempus("präs") if lexeme in ("behandeln", "beschreiben"): return create_diamond("", "handlung", lexeme, [temp, agens, patiens]) elif lexeme == "eingehen": preposition = gen_prep("auf", "zusammenhang") patiens.insert_subdiamond(1, preposition) aux = create_diamond("AUX", "partverbstamm", "ein-gehen", [temp, agens, patiens]) return create_diamond("", "infinitum", "ein-X-trans", [aux]) elif lexeme == "aufgreifen": aux = create_diamond("AUX", "partverbstamm", "auf-greifen", [temp, agens, patiens]) return create_diamond("", "infinitum", "auf-X-trans", [aux])