def build_sign_from_node(element): """ Given the XML element of a CCG sign, builds the internal representation. """ # Assume there's only one child we're interested in (the category) child_elements = remove_unwanted_elements(element.childNodes) cat_elem = child_elements[0] # Get the category structure from the category node gramcategory = build_category_from_node(cat_elem) lf_elems = element.getElementsByTagName("lf") if len(lf_elems) == 0 or len(remove_unwanted_elements(lf_elems[0].childNodes)) == 0: raise GrammarReadError, "No logical form found for entry: "\ "%s. What is syntax without semantics?" % cat_elem.toxml() # Steedman, 2010 (private correspondence) lf_elem = lf_elems[0] lf_children = remove_unwanted_elements(lf_elem.childNodes) ## Get semantics from the lf node sems = build_lf_from_node(lf_children[0]) if sems is None: raise GrammarReadError, "Could not build semantic " \ "representation for %s." % lf_elem.toxml() sems = Semantics(sems) # Store the full category for this entry return Sign(gramcategory, sems)
def build_sign_from_node(element): """ Given the XML element of a CCG sign, builds the internal representation. """ # Assume there's only one child we're interested in (the category) child_elements = remove_unwanted_elements(element.childNodes) cat_elem = child_elements[0] # Get the category structure from the category node gramcategory = build_category_from_node(cat_elem) lf_elems = element.getElementsByTagName("lf") if len(lf_elems) == 0 or len( remove_unwanted_elements(lf_elems[0].childNodes)) == 0: raise GrammarReadError, "No logical form found for entry: "\ "%s. What is syntax without semantics?" % cat_elem.toxml() # Steedman, 2010 (private correspondence) lf_elem = lf_elems[0] lf_children = remove_unwanted_elements(lf_elem.childNodes) ## Get semantics from the lf node sems = build_lf_from_node(lf_children[0]) if sems is None: raise GrammarReadError, "Could not build semantic " \ "representation for %s." % lf_elem.toxml() sems = Semantics(sems) # Store the full category for this entry return Sign(gramcategory, sems)
def build_lf_from_node(elem): """ Given the "lf" node of a lexical entry, builds a logical form representing it internally. @return: a LogicalForm built from the node """ name = elem.nodeName if name == "point": # A point in the (as yet equally tempered) tonal space, # relative to the chord of the chord x,y = require_attrs(elem, ["x", "y"]) x,y = int(x), int(y) if not 0 <= x < 4 or not 0 <= y < 3: raise GrammarReadError, "equal temperament tonal space "\ "points should be between (0,0) and (3,2): got (%d,%d)" \ % (x,y) # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A tonal space point cannot have children." return LexicalCoordinate((x,y)) elif name == "list": # A path of points (usually just one point) subnodes = remove_unwanted_elements(elem.childNodes) children = [build_lf_from_node(node) for node in subnodes] return List(children) elif name == "leftonto": # A leftonto predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A leftonto predicate cannot "\ "have children." return Leftonto() elif name == "rightonto": # A rightonto predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A rightonto predicate cannot "\ "have children." return Rightonto() elif name == "now": # A now predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A now predicate cannot "\ "have children." return Now() elif name == "abstraction": # Lambda abstraction # All children except the last are abstracted variables subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) < 2: raise GrammarReadError, "No subexpression in lambda "\ "abstraction: %s" % elem.toxml() variables = [build_lf_from_node(node) for node in subnodes[:-1]] for var in variables: if not isinstance(var, Variable): raise GrammarReadError, "Can only abstract over "\ "variables, not %s" % type(var).__name__ expression = build_lf_from_node(subnodes[-1]) return multi_abstract(*tuple(variables+[expression])) elif name == "application": # Function application # Recursively build functor and argument LFs subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) < 2: raise GrammarReadError, "Function application needs to "\ "have at least two subnodes" children = [build_lf_from_node(node) for node in subnodes] return multi_apply(*children) elif name == "variable": # Variable reference varid = require_attr(elem, "name") return Variable(varid) else: raise GrammarReadError, "Got invalid node %s in LF" % name
def build_lf_from_node(elem): """ Given the "lf" node of a lexical entry, builds a logical form representing it internally. @return: a LogicalForm built from the node """ name = elem.nodeName if name == "point": # A point in the (as yet equally tempered) tonal space, # relative to the chord of the chord x, y = require_attrs(elem, ["x", "y"]) x, y = int(x), int(y) if not 0 <= x < 4 or not 0 <= y < 3: raise GrammarReadError, "equal temperament tonal space "\ "points should be between (0,0) and (3,2): got (%d,%d)" \ % (x,y) # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A tonal space point cannot have children." return LexicalCoordinate((x, y)) elif name == "list": # A path of points (usually just one point) subnodes = remove_unwanted_elements(elem.childNodes) children = [build_lf_from_node(node) for node in subnodes] return List(children) elif name == "leftonto": # A leftonto predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A leftonto predicate cannot "\ "have children." return Leftonto() elif name == "rightonto": # A rightonto predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A rightonto predicate cannot "\ "have children." return Rightonto() elif name == "now": # A now predicate literal # Shouldn't be any children subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) != 0: raise GrammarReadError, "A now predicate cannot "\ "have children." return Now() elif name == "abstraction": # Lambda abstraction # All children except the last are abstracted variables subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) < 2: raise GrammarReadError, "No subexpression in lambda "\ "abstraction: %s" % elem.toxml() variables = [build_lf_from_node(node) for node in subnodes[:-1]] for var in variables: if not isinstance(var, Variable): raise GrammarReadError, "Can only abstract over "\ "variables, not %s" % type(var).__name__ expression = build_lf_from_node(subnodes[-1]) return multi_abstract(*tuple(variables + [expression])) elif name == "application": # Function application # Recursively build functor and argument LFs subnodes = remove_unwanted_elements(elem.childNodes) if len(subnodes) < 2: raise GrammarReadError, "Function application needs to "\ "have at least two subnodes" children = [build_lf_from_node(node) for node in subnodes] return multi_apply(*children) elif name == "variable": # Variable reference varid = require_attr(elem, "name") return Variable(varid) else: raise GrammarReadError, "Got invalid node %s in LF" % name
def __init__(self, grammar_name=None): """ Creates a new grammar by reading from an XML grammar file. Words (morph items) are stored in morph_items. Families (lexical families) are stored in families. Instantiate this directly only if you want, for some reason, to be sure of getting a new instance of Grammar. Most of the time, you can load a named grammar using L{get_grammar}, which will cache already loaded grammars and return the same instance again if you ask for the same name. @type grammar_name: string @param grammar_name: name of the grammar definition to be loaded. Call L{get_grammar_names} for a list of available grammars. If None, loads the default grammar. """ if grammar_name is None: grammar_name = settings.DEFAULT_GRAMMAR self.name = grammar_name filename_base = os.path.join(settings.GRAMMAR_DATA_DIR, grammar_name) self.grammar_file = os.path.join(filename_base, "grammar.xml") # Read in the grammar logger.debug("Grammar: %s" % self.grammar_file) # Read in the XML from the file self.grammar_dom = xml.dom.minidom.parse(self.grammar_file) grammar_tag = get_single_element_by_tag_name(self.grammar_dom, "grammar") # Get a named formalism, or the default one formalism_attr = grammar_tag.attributes.getNamedItem("formalism") if formalism_attr is None: formalism = get_default_formalism() else: formalism_name = str(formalism_attr.value) try: formalism = get_formalism(formalism_name) except FormalismLoadError: logger.error("The formalism '%s' does not exist. Possible "\ "formalisms are: %s" % (formalism_name, ", ".join(FORMALISMS))) raise self.formalism = formalism ############################### ### Reading in the lexicon lex_tag = get_single_element_by_tag_name(self.grammar_dom, "lexicon") lexicon_file = os.path.join( filename_base, lex_tag.attributes.getNamedItem("file").value) logger.debug("Lexicon: %s" % lexicon_file) # Read in the lexicon self.lexicon_dom = xml.dom.minidom.parse(lexicon_file) ############################### ### Reading in the words morph_tag = get_single_element_by_tag_name(self.grammar_dom, "morphology") morph_file = os.path.join( filename_base, morph_tag.attributes.getNamedItem("file").value) logger.debug("Morphology: %s" % morph_file) # Read in the lexicon self.morph_dom = xml.dom.minidom.parse(morph_file) ############################### ### Reading in the rules rules_tag = get_single_element_by_tag_name(self.grammar_dom, "rules") rules_file = os.path.join( filename_base, rules_tag.attributes.getNamedItem("file").value) logger.debug("Rules: %s" % rules_file) # Read in the lexicon self.rules_dom = xml.dom.minidom.parse(rules_file) ############################### ### Reading in the functions list (only used for certain formalisms) functions_tag = get_single_element_by_tag_name(self.grammar_dom, "functions", optional=True) self.literal_functions = {} available_funs = formalism.literal_functions if functions_tag is not None: functions_file = os.path.join( filename_base, functions_tag.attributes.getNamedItem("file").value) logger.debug("Functions: %s" % functions_file) # Read in the functions from the XML functions_dom = xml.dom.minidom.parse(functions_file) functions_xml = get_single_element_by_tag_name( functions_dom, "functions") functions = remove_unwanted_elements( functions_xml.getElementsByTagName("function")) # Try adding each of the functions, using the formalism's definitions for func_el in functions: func_name = func_el.attributes.getNamedItem("name").value if func_name in available_funs: lit_fun = available_funs[func_name] self.literal_functions[lit_fun.name] = lit_fun else: raise GrammarReadError, "The literal function \"%s\" is not defined in the code for the %s formalism." % formalism.get_name( ) ############################### ### Reading in the modality hierarchy modalities_tag = get_single_element_by_tag_name(self.grammar_dom, "modalities", optional=True) if modalities_tag is not None: modalities_file = os.path.join( filename_base, modalities_tag.attributes.getNamedItem("file").value) logger.debug("Modalities: %s" % modalities_file) # Read in the modalities self.modalities_dom = get_single_element_by_tag_name( xml.dom.minidom.parse(modalities_file), "modalities") else: self.modalities_dom = None ############################### ### Read in grammar-level meta data attrs = self.grammar_dom.getElementsByTagName("attr") # Initialize values that might not get set self.max_categories = None # Read in the values from the XML for el in attrs: name = el.getAttribute("name") value = el.getAttribute("value") # Check for all the attributes we recognize if name == "max_categories": self.max_categories = int(value) ############################### ### Prepare the morph word classes self.chord_classes = {} for entry in self.morph_dom.getElementsByTagName("class"): chord_class = ChordClass.from_dom(entry) self.chord_classes[chord_class.name] = chord_class # Maybe handle macros here. Not currently using them. ############################### ### Prepare lexical entries # Use a hash table for this too, indexed by pos self.families = {} self.inactive_families = [] for family in self.lexicon_dom.getElementsByTagName("family"): fam = Family.from_dom(formalism, family) # Check whether the family has any entries and don't use it if not if len(fam.entries) > 0: # Put a new Family in the table for every family entry if fam.pos in self.families: # Already an entry for this POS: add to the list self.families[fam.pos].append(fam) else: # No occurence of this POS yet: add a new list self.families[fam.pos] = [fam] else: self.inactive_families.append(fam.pos) ############################### ### Prepare the morph items self.morphs = [] for entry in self.morph_dom.getElementsByTagName("entry"): morph = MorphItem.from_dom(formalism, entry, self.chord_classes) self.morphs.append(morph) # Check that all the morphs correspond to a defined POS for morph in self.morphs: if morph.pos not in self.families: raise GrammarReadError, "morph item refers to undefined "\ "part-of-speech '%s': %s" % (morph.pos, morph.element.toxml()) ############################### ### Prepare modalities hierarchy if self.modalities_dom: self.modality_tree = ModalityTree.from_dom(self.modalities_dom) else: # The modalities that existed before they were added to the # XML spec were just "c" and "." self.modality_tree = ModalityTree( [ModalityTreeNode("", [ModalityTreeNode("c")])]) ############################### ### Prepare rules self.rules = [] # Go through each different type of rule and add appropriate Rule subclasses rule_block = get_single_element_by_tag_name(self.rules_dom, "rules") for rule_tag in remove_unwanted_elements(rule_block.childNodes): rulename = rule_tag.tagName if rulename == "lexrules": # We'll deal with these later continue if rulename not in self.formalism.rules: raise GrammarReadError, "unknown rule '%s' (formalism "\ "defines: %s)" % (rulename, ", ".join(formalism.rules.keys())) ruleclass = self.formalism.rules[rulename] # Instantiate the rule, using options from the XML self.rules.append( ruleclass(modalities=self.modality_tree, grammar=self, **attrs_to_dict(rule_tag.attributes))) # Keep rules sorted by arity for ease of access self.unary_rules = [] self.binary_rules = [] for rule in self.rules: if rule.arity == 1: self.unary_rules.append(rule) elif rule.arity == 2: self.binary_rules.append(rule) # Index rules by internal name for ease of access self.rules_by_name = {} for rule in self.rules: if rule.internal_name in self.rules_by_name: # This shouldn't happen: each rule name should only be used once raise GrammarReadError, "instantiated two rules with the same "\ "internal name: %s. Either the XML has mistakenly "\ "instantiated the same thing twice, or the rule class has "\ "failed to give different varieties of the rule different "\ "names" % rule.internal_name self.rules_by_name[rule.internal_name] = rule # Optionally read in a lexrules element and expand the lexicon # using its entries self.lexical_rules = [] lexrules_tag = get_single_element_by_tag_name(self.rules_dom, "lexrules", optional=True) if lexrules_tag is not None: for rule_tag in remove_unwanted_elements(lexrules_tag.childNodes): rulename = rule_tag.tagName if rulename not in self.formalism.rules: raise GrammarReadError, "unknown lexical expansion "\ "rule '%s' (formalism defines: %s)" % \ (rulename, ", ".join(formalism.rules.keys())) ruleclass = self.formalism.rules[rulename] attrs = attrs_to_dict(rule_tag.attributes) # Make sure expanded category has a suffix to put on # POSs. If one isn't given, set a default. if "pos_suffix" in attrs: pos_suffix = attrs["pos_suffix"] del attrs["pos_suffix"] else: pos_suffix = "_Rep" # Instantiate the rule, using any options given rule = ruleclass(modalities=self.modality_tree, grammar=self, **attrs) rule.pos_suffix = pos_suffix # Can only use unary rules - check this one is if rule.arity != 1: raise "can only use unary rules as lexical "\ "expansions. Tried to use %s, which has arity "\ "%d." % (rulename, rule.arity) self.lexical_rules.append(rule) # Use each lexical rule to expand the lexicon for rule in self.lexical_rules: for fam in sum(self.families.values(), []): for entry in fam.entries: # Try apply the expansion rule to this entry new_signs = rule.apply_rule([entry.sign]) if new_signs is not None and len(new_signs) > 0: # Make a new POS for this expanded category new_pos = "%s%s" % (fam.pos, rule.pos_suffix) new_entries = [EntriesItem(self.formalism, "Expanded", new_sign) \ for new_sign in new_signs] new_family = Family(self.formalism, new_pos, new_pos, new_entries, chordfn=fam.chordfn, expanded=rule.internal_name) self.families.setdefault(new_pos, []).append(new_family) # Also create morph items for each of those # that referenced the old unexpanded rules for morph in [ m for m in self.morphs if m.pos == fam.pos ]: self.morphs.append( MorphItem(self.formalism, copy.deepcopy(morph.words), new_pos, optional_minor=morph.optional_minor, chord_class=morph.chord_class)) ############### # Index the morph items by word to make lookup easier self.morph_items = {} for morph in self.morphs: # If the pos is completely inactive in the lexicon, ignore this morph if not morph.pos in self.inactive_families: # Go through each of this morph's words for word in morph.words: # Put a new MorphItem in the table for every entry if word in self.morph_items: # Already a list for this word: add to it self.morph_items[word].append(morph) else: # First occurence of this word: add a new list self.morph_items[word] = [morph] ############### # Read in an equivalence map if one is given for morph entries equiv_map_el = get_single_element_by_tag_name(self.morph_dom, "equivmap", optional=True) if equiv_map_el is not None: self.equiv_map = EquivalenceMap.from_dom(formalism, equiv_map_el, self.chord_classes, self.morphs) else: self.equiv_map = EquivalenceMap() ########### # Prepare a version of the family list for MIDI input self.midi_families = {} for pos, fams in self.families.items(): new_fams = [] for fam in fams: # Exclude any generated by lexical expansions, unless they're # tonic function if fam.expanded is not None and fam.chordfn != "T": continue new_fams.append(fam) if new_fams: # Exclude any that are mapped onto another entry by an equivalence # mapping that changes the root if pos in self.equiv_map: continue self.midi_families[pos] = new_fams ####### Debugging output logger.debug("Read the following information from the grammar:") logger.debug("Morphology:") logger.debug("\n".join(["%s: %s" % (word, ", ".join(["%s" % item.pos for item in items])) \ for word,items in self.morph_items.items()])) logger.debug("Lexicon:") logger.debug("\n".join([", ".join(["%s" % initem for initem in item]) \ for item in self.families.values()])) logger.debug("Rules:") logger.debug("\n".join([" %s" % item for item in self.rules])) logger.debug("Lexical expansion rules:") logger.debug("\n".join([" %s" % item for item in self.lexical_rules])) logger.debug("Modalities:") logger.debug("%s" % self.modality_tree) if len(self.literal_functions): logger.debug("Literal functions:") logger.debug("\n".join([ " %s: %s" % (name, val) for (name, val) in self.literal_functions.items() ]))
def __init__(self, grammar_name=None): """ Creates a new grammar by reading from an XML grammar file. Words (morph items) are stored in morph_items. Families (lexical families) are stored in families. Instantiate this directly only if you want, for some reason, to be sure of getting a new instance of Grammar. Most of the time, you can load a named grammar using L{get_grammar}, which will cache already loaded grammars and return the same instance again if you ask for the same name. @type grammar_name: string @param grammar_name: name of the grammar definition to be loaded. Call L{get_grammar_names} for a list of available grammars. If None, loads the default grammar. """ if grammar_name is None: grammar_name = settings.DEFAULT_GRAMMAR self.name = grammar_name filename_base = os.path.join(settings.GRAMMAR_DATA_DIR, grammar_name) self.grammar_file = os.path.join(filename_base, "grammar.xml") # Read in the grammar logger.debug("Grammar: %s" % self.grammar_file) # Read in the XML from the file self.grammar_dom = xml.dom.minidom.parse(self.grammar_file) grammar_tag = get_single_element_by_tag_name(self.grammar_dom, "grammar") # Get a named formalism, or the default one formalism_attr = grammar_tag.attributes.getNamedItem("formalism") if formalism_attr is None: formalism = get_default_formalism() else: formalism_name = str(formalism_attr.value) try: formalism = get_formalism(formalism_name) except FormalismLoadError: logger.error("The formalism '%s' does not exist. Possible "\ "formalisms are: %s" % (formalism_name, ", ".join(FORMALISMS))) raise self.formalism = formalism ############################### ### Reading in the lexicon lex_tag = get_single_element_by_tag_name(self.grammar_dom, "lexicon") lexicon_file = os.path.join(filename_base, lex_tag.attributes.getNamedItem("file").value) logger.debug("Lexicon: %s" % lexicon_file) # Read in the lexicon self.lexicon_dom = xml.dom.minidom.parse(lexicon_file) ############################### ### Reading in the words morph_tag = get_single_element_by_tag_name(self.grammar_dom, "morphology") morph_file = os.path.join(filename_base, morph_tag.attributes.getNamedItem("file").value) logger.debug( "Morphology: %s" % morph_file) # Read in the lexicon self.morph_dom = xml.dom.minidom.parse(morph_file) ############################### ### Reading in the rules rules_tag = get_single_element_by_tag_name(self.grammar_dom, "rules") rules_file = os.path.join(filename_base, rules_tag.attributes.getNamedItem("file").value) logger.debug( "Rules: %s" % rules_file) # Read in the lexicon self.rules_dom = xml.dom.minidom.parse(rules_file) ############################### ### Reading in the functions list (only used for certain formalisms) functions_tag = get_single_element_by_tag_name(self.grammar_dom, "functions", optional=True) self.literal_functions = {} available_funs = formalism.literal_functions if functions_tag is not None: functions_file = os.path.join(filename_base, functions_tag.attributes.getNamedItem("file").value) logger.debug( "Functions: %s" % functions_file) # Read in the functions from the XML functions_dom = xml.dom.minidom.parse(functions_file) functions_xml = get_single_element_by_tag_name(functions_dom, "functions") functions = remove_unwanted_elements(functions_xml.getElementsByTagName("function")) # Try adding each of the functions, using the formalism's definitions for func_el in functions: func_name = func_el.attributes.getNamedItem("name").value if func_name in available_funs: lit_fun = available_funs[func_name] self.literal_functions[lit_fun.name] = lit_fun else: raise GrammarReadError, "The literal function \"%s\" is not defined in the code for the %s formalism." % formalism.get_name() ############################### ### Reading in the modality hierarchy modalities_tag = get_single_element_by_tag_name(self.grammar_dom, "modalities", optional=True) if modalities_tag is not None: modalities_file = os.path.join(filename_base, modalities_tag.attributes.getNamedItem("file").value) logger.debug( "Modalities: %s" % modalities_file) # Read in the modalities self.modalities_dom = get_single_element_by_tag_name(xml.dom.minidom.parse(modalities_file), "modalities") else: self.modalities_dom = None ############################### ### Read in grammar-level meta data attrs = self.grammar_dom.getElementsByTagName("attr") # Initialize values that might not get set self.max_categories = None # Read in the values from the XML for el in attrs: name = el.getAttribute("name") value = el.getAttribute("value") # Check for all the attributes we recognize if name == "max_categories": self.max_categories = int(value) ############################### ### Prepare the morph word classes self.chord_classes = {} for entry in self.morph_dom.getElementsByTagName("class"): chord_class = ChordClass.from_dom(entry) self.chord_classes[chord_class.name] = chord_class # Maybe handle macros here. Not currently using them. ############################### ### Prepare lexical entries # Use a hash table for this too, indexed by pos self.families = {} self.inactive_families = [] for family in self.lexicon_dom.getElementsByTagName("family"): fam = Family.from_dom(formalism, family) # Check whether the family has any entries and don't use it if not if len(fam.entries) > 0: # Put a new Family in the table for every family entry if fam.pos in self.families: # Already an entry for this POS: add to the list self.families[fam.pos].append(fam) else: # No occurence of this POS yet: add a new list self.families[fam.pos] = [fam] else: self.inactive_families.append(fam.pos) ############################### ### Prepare the morph items self.morphs = [] for entry in self.morph_dom.getElementsByTagName("entry"): morph = MorphItem.from_dom(formalism,entry,self.chord_classes) self.morphs.append(morph) # Check that all the morphs correspond to a defined POS for morph in self.morphs: if morph.pos not in self.families: raise GrammarReadError, "morph item refers to undefined "\ "part-of-speech '%s': %s" % (morph.pos, morph.element.toxml()) ############################### ### Prepare modalities hierarchy if self.modalities_dom: self.modality_tree = ModalityTree.from_dom(self.modalities_dom) else: # The modalities that existed before they were added to the # XML spec were just "c" and "." self.modality_tree = ModalityTree([ ModalityTreeNode("", [ModalityTreeNode("c")]) ]) ############################### ### Prepare rules self.rules = [] # Go through each different type of rule and add appropriate Rule subclasses rule_block = get_single_element_by_tag_name(self.rules_dom, "rules") for rule_tag in remove_unwanted_elements(rule_block.childNodes): rulename = rule_tag.tagName if rulename == "lexrules": # We'll deal with these later continue if rulename not in self.formalism.rules: raise GrammarReadError, "unknown rule '%s' (formalism "\ "defines: %s)" % (rulename, ", ".join(formalism.rules.keys())) ruleclass = self.formalism.rules[rulename] # Instantiate the rule, using options from the XML self.rules.append(ruleclass(modalities=self.modality_tree, grammar=self, **attrs_to_dict(rule_tag.attributes))) # Keep rules sorted by arity for ease of access self.unary_rules = [] self.binary_rules = [] for rule in self.rules: if rule.arity == 1: self.unary_rules.append(rule) elif rule.arity == 2: self.binary_rules.append(rule) # Index rules by internal name for ease of access self.rules_by_name = {} for rule in self.rules: if rule.internal_name in self.rules_by_name: # This shouldn't happen: each rule name should only be used once raise GrammarReadError, "instantiated two rules with the same "\ "internal name: %s. Either the XML has mistakenly "\ "instantiated the same thing twice, or the rule class has "\ "failed to give different varieties of the rule different "\ "names" % rule.internal_name self.rules_by_name[rule.internal_name] = rule # Optionally read in a lexrules element and expand the lexicon # using its entries self.lexical_rules = [] lexrules_tag = get_single_element_by_tag_name(self.rules_dom, "lexrules", optional=True) if lexrules_tag is not None: for rule_tag in remove_unwanted_elements(lexrules_tag.childNodes): rulename = rule_tag.tagName if rulename not in self.formalism.rules: raise GrammarReadError, "unknown lexical expansion "\ "rule '%s' (formalism defines: %s)" % \ (rulename, ", ".join(formalism.rules.keys())) ruleclass = self.formalism.rules[rulename] attrs = attrs_to_dict(rule_tag.attributes) # Make sure expanded category has a suffix to put on # POSs. If one isn't given, set a default. if "pos_suffix" in attrs: pos_suffix = attrs["pos_suffix"] del attrs["pos_suffix"] else: pos_suffix = "_Rep" # Instantiate the rule, using any options given rule = ruleclass(modalities=self.modality_tree, grammar=self, **attrs) rule.pos_suffix = pos_suffix # Can only use unary rules - check this one is if rule.arity != 1: raise "can only use unary rules as lexical "\ "expansions. Tried to use %s, which has arity "\ "%d." % (rulename, rule.arity) self.lexical_rules.append(rule) # Use each lexical rule to expand the lexicon for rule in self.lexical_rules: for fam in sum(self.families.values(), []): for entry in fam.entries: # Try apply the expansion rule to this entry new_signs = rule.apply_rule([entry.sign]) if new_signs is not None and len(new_signs) > 0: # Make a new POS for this expanded category new_pos = "%s%s" % (fam.pos, rule.pos_suffix) new_entries = [EntriesItem(self.formalism, "Expanded", new_sign) \ for new_sign in new_signs] new_family = Family(self.formalism, new_pos, new_pos, new_entries, chordfn=fam.chordfn, expanded=rule.internal_name) self.families.setdefault(new_pos, []).append(new_family) # Also create morph items for each of those # that referenced the old unexpanded rules for morph in [m for m in self.morphs if m.pos == fam.pos]: self.morphs.append( MorphItem( self.formalism, copy.deepcopy(morph.words), new_pos, optional_minor=morph.optional_minor, chord_class=morph.chord_class)) ############### # Index the morph items by word to make lookup easier self.morph_items = {} for morph in self.morphs: # If the pos is completely inactive in the lexicon, ignore this morph if not morph.pos in self.inactive_families: # Go through each of this morph's words for word in morph.words: # Put a new MorphItem in the table for every entry if word in self.morph_items: # Already a list for this word: add to it self.morph_items[word].append(morph) else: # First occurence of this word: add a new list self.morph_items[word] = [morph] ############### # Read in an equivalence map if one is given for morph entries equiv_map_el = get_single_element_by_tag_name(self.morph_dom, "equivmap", optional=True) if equiv_map_el is not None: self.equiv_map = EquivalenceMap.from_dom(formalism, equiv_map_el, self.chord_classes, self.morphs) else: self.equiv_map = EquivalenceMap() ########### # Prepare a version of the family list for MIDI input self.midi_families = {} for pos,fams in self.families.items(): new_fams = [] for fam in fams: # Exclude any generated by lexical expansions, unless they're # tonic function if fam.expanded is not None and fam.chordfn != "T": continue new_fams.append(fam) if new_fams: # Exclude any that are mapped onto another entry by an equivalence # mapping that changes the root if pos in self.equiv_map: continue self.midi_families[pos] = new_fams ####### Debugging output logger.debug( "Read the following information from the grammar:") logger.debug( "Morphology:") logger.debug("\n".join(["%s: %s" % (word, ", ".join(["%s" % item.pos for item in items])) \ for word,items in self.morph_items.items()])) logger.debug("Lexicon:") logger.debug("\n".join([", ".join(["%s" % initem for initem in item]) \ for item in self.families.values()])) logger.debug("Rules:") logger.debug("\n".join([" %s" % item for item in self.rules])) logger.debug("Lexical expansion rules:") logger.debug("\n".join([" %s" % item for item in self.lexical_rules])) logger.debug("Modalities:") logger.debug("%s" % self.modality_tree) if len(self.literal_functions): logger.debug("Literal functions:") logger.debug("\n".join([" %s: %s" % (name,val) for (name,val) in self.literal_functions.items()]))