def to_dot_str_graph(self): lines = [u'digraph finite_state_machine {', '\tdpi=100;'] node_lines = [] for node in self.G.nodes(): d_node = Machine.d_clean(node) d_node_id = d_node.replace('=', '_') if "_" in d_node_id: d_node = d_node_id.split('_')[-2] else: d_node = d_node_id node_lines.append(u'\t{0} [shape = circle, label = "{1}"];'.format( d_node_id, d_node).replace('-', '_')) lines += sorted(node_lines) edge_lines = [] for u, v, edata in self.G.edges(data=True): d_u = Machine.d_clean(u) d_v = Machine.d_clean(v) d_u = d_u.replace('=', '_') d_v = d_v.replace('=', '_') edge_lines.append(u'\t{0} -> {1} [ label = "{2}" ];'.format( d_u, d_v, edata['color'])) lines += sorted(edge_lines) lines.append('}') return u'\n'.join(lines)
def parse(self, sentence): """ input sentence is a list of tokens and chunk in the following format: [(token1_tag1, token1_tag2, token1_tagX, ...), ([(tokeninchunk1_tag1, tokeninchunk1_tagX,...), ... ], case_of_chunk), (token_out_of_chunk_again_tagX,...), ... ] output is a list of machines """ machines = [] for token_or_chunk in sentence: # chunk or token? if type(token_or_chunk[0]) == list: # chunk chunk, _ = token_or_chunk machines.append([ Machine(analysis.split("/")[0], Control(analysis)) for _, analysis in chunk ]) else: # token token = token_or_chunk #[0] print 'token:', token _, analysis = token machines.append( [Machine(analysis.split("/")[0], Control(analysis))]) return machines
def to_dot_str_graph(self): lines = [u'digraph finite_state_machine {', '\tdpi=100;'] node_lines = [] for node in self.G.nodes(): d_node = Machine.d_clean(node) d_node_id = d_node.replace('=', '_') if "_" in d_node_id: d_node = d_node_id.split('_')[-2] else: d_node= d_node_id node_lines.append(u'\t{0} [shape = circle, label = "{1}"];'.format( d_node_id, d_node).replace('-', '_')) lines += sorted(node_lines) edge_lines = [] for u,v, edata in self.G.edges(data=True): d_u = Machine.d_clean(u) d_v = Machine.d_clean(v) d_u = d_u.replace('=', '_') d_v = d_v.replace('=', '_') edge_lines.append( u'\t{0} -> {1} [ label = "{2}" ];'.format( d_u, d_v,edata['color'])) lines += sorted(edge_lines) lines.append('}') return u'\n'.join(lines)
def act(self, seq): # logging.info( # "appending machines {0} and {1} to new binary {2}".format( # seq[self.first_pos], seq[self.second_pos], self.bin_rel)) rel_machine = Machine(self.bin_rel) rel_machine.append(seq[self.first_pos], 1) rel_machine.append(seq[self.second_pos], 2) return [rel_machine]
def get_machine(self, printname, new_machine=False, allow_new_base=False, allow_new_ext=False, allow_new_oov=True): """returns the lowest level (base < ext < oov) existing machine for the printname. If none exist, creates a new machine in the lowest level allowed by the allow_* flags. Will always create new machines for uppercase printnames""" # returns a new machine without adding it to any lexicon if new_machine: return Machine(printname, ConceptControl()) # TODO if not printname: return self.get_machine("_empty_") if printname.isupper(): # return self.get_machine(printname, new_machine=True) return self.get_machine(printname=printname.lower(), new_machine=new_machine, allow_new_base=allow_new_base, allow_new_ext=allow_new_ext, allow_new_oov=allow_new_oov) machines = self.lexicon.get( printname, self.ext_lexicon.get(printname, self.oov_lexicon.get(printname, set()))) if len(machines) == 0: # logging.info( # u'creating new machine for unknown word: "{0}"'.format( # printname)) new_machine = Machine(printname, ConceptControl()) if allow_new_base: self.add(printname, new_machine, external=False) elif allow_new_ext: self.add(printname, new_machine) elif allow_new_oov: self.add(printname, new_machine, oov=True) else: return None return self.get_machine(printname) else: if len(machines) > 1: debug_str = u'ambiguous printname: {0}, machines: {1}'.format( printname, [ lex.get(printname, set([])) for lex in (self.lexicon, self.ext_lexicon, self.oov_lexicon) ]) raise Exception(debug_str) return next(iter(machines))
def create_from_dumps(machines_dump, ext_machines_dump, primitives, cfg): """builds the lexicon from dumps created by Lexicon.dump_machines""" lexicon = Lexicon(cfg) lexicon.primitives = primitives for word, dumped_def_graph in machines_dump.iteritems(): new_machine = Machine(word, ConceptControl()) lexicon.add_def_graph(word, new_machine, dumped_def_graph) lexicon.add(word, new_machine, external=False) for word, dumped_def_graph in ext_machines_dump.iteritems(): new_machine = Machine(word, ConceptControl()) lexicon.add_def_graph(word, new_machine, dumped_def_graph) lexicon.add(word, new_machine, external=True) return lexicon
def activate(self): """Finds and returns the machines that should be activated by the machines already active. These machines are automatically added to self.active as well When exactly a machine should be activated is still up for consideration; however, currently this method returns a machine if all non-primitive machines on its partitions are active.""" activated = [] for printname, static_machines in self.static.iteritems(): for static_machine in static_machines: if printname in self.active: continue has_machine = False for machine in chain(*static_machine.partitions): has_machine = True if (not unicode(machine).startswith(u'#') and unicode(machine) not in self.active): break else: if has_machine: m = Machine(printname, copy.copy(static_machine.control)) self.add_active(m) activated.append(m) return activated
def draw_single_graph(self, word, path): clean_word = Machine.d_clean(word) for c, machine in enumerate(self.definitions[word]): graph = MachineGraph.create_from_machines([machine]) file_name = os.path.join(path, '{0}_{1}.dot'.format(clean_word, c)) with open(file_name, 'w') as file_obj: file_obj.write(graph.to_dot().encode('utf-8'))
def __build_definition_graph(self, root_def_m, static_m, def_graph, stop, canonicals, deep_cases): """ Walks through the machines reachable from @p static_m, and adds a reference to the corresponding canonical machines to the definition graph node (@p root_def_m). """ for static_child in static_m.children(): if not static_child.fancy(): cname = self.get_static_machine( static_child.printname())[0].printname() def_child = def_graph[cname][0] if def_child != root_def_m: root_def_m.append(def_child) elif (deep_cases and static_child.deep_case() and static_child.printname() not in stop): root_def_m.append(Machine(static_child.printname())) if static_child.fancy() or static_child not in canonicals: # deep cases are added by their printname to stop, because as # of yet, hash is id-based for machines if (static_child not in stop and static_child.printname() not in stop): if static_child.fancy(): stop.add(static_child.printname()) else: stop.add(static_child) self.__build_definition_graph(root_def_m, static_child, def_graph, stop, canonicals, deep_cases)
def test(): #a = Machine("the", PosControl("DET")) #kek = Machine("kek", PosControl("ADJ")) #kockat = Machine("kockat", PosControl("NOUN<CAS<ACC>>")) m = Machine("vonat") m2 = Machine("tb") m.append(m2) m2.append(m) m3 = copy(m) assert m3
def create_machine(self, name, partitions): # lists are accepted because of ["=", "AGT"] if type(name) is list: name = "".join(name) # HACK until we find a good solution for defaults name = name.strip('<>') is_plur = name in self.plur_dict if is_plur: name = self.plur_dict[name] m = Machine(decode_from_proszeky(name), ConceptControl(), partitions) if is_plur: m.append(self.create_machine('more', 1), 0) return m
def draw_word_graphs(self): ensure_dir('graphs/words') for c, (word, machines) in enumerate(self.definitions.iteritems()): if c % 1000 == 0: logging.info("{0}...".format(c)) for i, machine in enumerate(machines): graph = MachineGraph.create_from_machines([machine]) clean_word = Machine.d_clean(word) if clean_word[0] == 'X': clean_word = clean_word[1:] f = open('graphs/words/{0}_{1}.dot'.format(clean_word, i), 'w') f.write(graph.to_dot().encode('utf-8'))
def to_dot(self): lines = [u'digraph finite_state_machine {', '\tdpi=100;'] # lines.append('\tordering=out;') # sorting everything to make the process deterministic node_lines = [] for node, n_data in self.G.nodes(data=True): d_node = Machine.d_clean(node) printname = Machine.d_clean('_'.join(d_node.split('_')[:-1])) if 'expanded' in n_data and not n_data['expanded']: node_line = u'\t{0} [shape = circle, label = "{1}", style="filled"];'.format( # nopep8 d_node, printname).replace('-', '_') else: node_line = u'\t{0} [shape = circle, label = "{1}"];'.format( d_node, printname).replace('-', '_') node_lines.append(node_line) lines += sorted(node_lines) edge_lines = [] for u, v, edata in self.G.edges(data=True): if 'color' in edata: d_node1 = Machine.d_clean(u) d_node2 = Machine.d_clean(v) edge_lines.append(u'\t{0} -> {1} [ label = "{2}" ];'.format( Machine.d_clean(d_node1), Machine.d_clean(d_node2), edata['color'])) lines += sorted(edge_lines) lines.append('}') return u'\n'.join(lines)
def to_dot(self): lines = [u'digraph finite_state_machine {', '\tdpi=100;'] # lines.append('\tordering=out;') # sorting everything to make the process deterministic node_lines = [] for node, n_data in self.G.nodes(data=True): d_node = Machine.d_clean(node) printname = Machine.d_clean('_'.join(d_node.split('_')[:-1])) if 'expanded' in n_data and not n_data['expanded']: node_line = u'\t{0} [shape = circle, label = "{1}", style="filled"];'.format( # nopep8 d_node, printname).replace('-', '_') else: node_line = u'\t{0} [shape = circle, label = "{1}"];'.format( d_node, printname).replace('-', '_') node_lines.append(node_line) lines += sorted(node_lines) edge_lines = [] for u,v,edata in self.G.edges(data=True): if 'color' in edata: d_node1 = Machine.d_clean(u) d_node2 = Machine.d_clean(v) edge_lines.append( u'\t{0} -> {1} [ label = "{2}" ];'.format( Machine.d_clean(d_node1), Machine.d_clean(d_node2),edata['color'])) lines += sorted(edge_lines) lines.append('}') return u'\n'.join(lines)
def get_machine(self, printname, second=False): if printname == 'have': logging.debug('have is changed to HAS') #logging.info('interpreting a form of "have" as "HAS"') return self.get_machine("HAS") if printname in self.active: return self.active[printname].keys()[0] cands = self.get_static_machine(printname) if not cands: if second: raise Exception( "no machine with printname {0}".format(printname) + "even after calling add_static for {0}".format( Machine(printname, ConceptControl()))) #logging.warning( #"creating new machine for '{0}'".format(printname)) self.add_static(Machine(printname, ConceptControl())) return self.get_machine(printname, second=True) # sanity check return cands[0]
def to_dot(self): lines = [u'digraph finite_state_machine {', '\tdpi=100;'] # lines.append('\tordering=out;') # sorting everything to make the process deterministic node_lines = [] for node in self.G.nodes_iter(): d_node = Machine.d_clean(node) printname = Machine.d_clean(d_node.split('_')[0]) node_lines.append(u'\t{0} [shape = circle, label = "{1}"];'.format( d_node, printname).replace('-', '_')) lines += sorted(node_lines) edge_lines = [] for node1, adjacency in self.G.adjacency_iter(): d_node1 = Machine.d_clean(node1) for node2, edges in adjacency.iteritems(): d_node2 = Machine.d_clean(node2) for i, attributes in edges.iteritems(): edge_lines.append( u'\t{0} -> {1} [ label = "{2}" ];'.format( d_node1.replace('-', '_'), d_node2.replace('-', '_'), attributes['color'])) lines += sorted(edge_lines) lines.append('}') return u'\n'.join(lines)
def get_dep_definition(self, word, deps): root_deps = filter(lambda d: d[0] == 'root', deps) if len(root_deps) != 1: logging.warning( u'no unique root dependency, skipping word "{0}"'.format(word)) return None root_word, root_id = root_deps[0][2] root_lemma = self.lemmatizer.lemmatize(root_word).replace('/', '_PER_') root_lemma = root_word if not root_lemma else root_lemma word2machine = self.get_machines_from_parsed_deps(deps) root_machine = word2machine[root_lemma] word_machine = word2machine.get(word, Machine(word, ConceptControl())) word_machine.append(root_machine, 0) return word_machine
def __init__(self, name, lexicon, supp_dict, max_depth=3): self.name = name self.lexicon = lexicon self.supp_dict = supp_dict self.max_depth = max_depth self.matchers = {} self.working_area = [Machine(None, KRPosControl('stem/VERB'))] # indexing 0th element in static because that is the canonical machine self.discover_arguments(lexicon.static[name][0]) control = self.generate_control() self.case_pattern = re.compile("N(OUN|P)[^C]*CAS<([^>]*)>") Construction.__init__(self, name, control) self.activated = False logging.info('VerbConstruction {0} created. Matchers: {1}'.format( self.name, self.matchers)) logging.info('Control: {0}'.format(self.control)) f = open('control.dot', 'w') f.write(self.control.to_dot())
def dep_to_dot(deps, fn): try: edges = [(d['dep']['lemma'], d['type'], d['gov']['lemma']) for d in deps if d['type'] not in EXCLUDE] except: edges = [(d[1][0], d[0], d[2][0]) for d in deps if d[0] not in EXCLUDE] words = set([e[0] for e in edges] + [e[2] for e in edges]) lines = [] for word in words: lines.append(u'\t{0} [shape=rectangle, label="{0}"];'.format( Machine.d_clean(word))) for edge in edges: dep, dtype, gov = map(Machine.d_clean, edge) lines.append(u'\t{0} -> {1} [label="{2}"];'.format(dep, gov, dtype)) with open(fn, 'w') as f: f.write(HEADER.encode("utf-8")) f.write(u"\n".join(lines).encode("utf-8")) f.write("}\n")
def dep_to_dot(deps, fn): try: edges = [ (d['dep']['lemma'], d['type'], d['gov']['lemma']) for d in deps if d['type'] not in EXCLUDE] except: edges = [(d[1][0], d[0], d[2][0]) for d in deps if d[0] not in EXCLUDE] words = set([e[0] for e in edges] + [e[2] for e in edges]) lines = [] for word in words: lines.append(u'\t{0} [shape=rectangle, label="{0}"];'.format( Machine.d_clean(word))) for edge in edges: dep, dtype, gov = map(Machine.d_clean, edge) lines.append(u'\t{0} -> {1} [label="{2}"];'.format(dep, gov, dtype)) with open(fn, 'w') as f: f.write(HEADER.encode("utf-8")) f.write(u"\n".join(lines).encode("utf-8")) f.write("}\n")
def dep_to_dot(deps): if isinstance(deps[0], dict): # new dep structure edges = [(d['dep']['lemma'], d['type'], d['gov']['lemma']) for d in deps if d['type'] not in EXCLUDE] else: # old dep structure edges = [(d[1][0], d[0], d[2][0]) for d in deps if d[0] not in EXCLUDE] words = set([e[0] for e in edges] + [e[2] for e in edges]) lines = [] for word in words: lines.append(u'\t{0} [shape=rectangle, label="{0}"];'.format( Machine.d_clean(word))) for edge in edges: dep, dtype, gov = map(Machine.d_clean, edge) lines.append(u'\t{0} -> {1} [label="{2}"];'.format(dep, gov, dtype)) dot_str = HEADER.encode("utf-8") dot_str += u"\n".join(lines).encode("utf-8") dot_str += "}\n" return dot_str
def dep_to_dot(deps): if isinstance(deps[0], dict): # new dep structure edges = [ (d['dep']['lemma'], d['type'], d['gov']['lemma']) for d in deps if d['type'] not in EXCLUDE] else: # old dep structure edges = [(d[1][0], d[0], d[2][0]) for d in deps if d[0] not in EXCLUDE] words = set([e[0] for e in edges] + [e[2] for e in edges]) lines = [] for word in words: lines.append(u'\t{0} [shape=rectangle, label="{0}"];'.format( Machine.d_clean(word))) for edge in edges: dep, dtype, gov = map(Machine.d_clean, edge) lines.append(u'\t{0} -> {1} [label="{2}"];'.format(dep, gov, dtype)) dot_str = HEADER.encode("utf-8") dot_str += u"\n".join(lines).encode("utf-8") dot_str += "}\n" return dot_str
def extract_definition_graph(self, deep_cases=False): """ Extracts the definition graph from the static graph. The former is a "flattened" version of the latter: all canonical words in the definition are connected to the definiendum, as well as the canonical version of non-canonical terms. The structure of the definition is not preserved. @param deep_cases if @c False (the default), deep cases in the definitions do not appear on the output graph. """ def_graph = {} canonicals = set(l[0] for l in self.static.values()) for name in self.static.keys(): def_graph[name] = [Machine(name)] for name, static_machines in self.static.iteritems(): #print "I am at machine", name static_machine = static_machines[0] if not static_machine.fancy(): def_machine = def_graph[name][0] self.__build_definition_graph(def_machine, static_machine, def_graph, set([]), canonicals, deep_cases) return def_graph
def unify_recursively(self, static_machine, zeros_only, first=False, stop=None): """Returns the active machine that corresponds to @p static_machine. It recursively unifies all machines in all partitions of @p static_machine with machines in the active set. @p static_machine may be either a machine or a string. @param stop the set of machines already unified.""" if stop is None: stop = set() if unicode(static_machine) == u'IS_A': return None # If we have already unified this machine: just return if (not isinstance(static_machine, str) and not isinstance(static_machine, unicode)): static_printname = static_machine.printname() else: static_printname = static_machine if static_printname in stop: #logging.debug('ur stops') return self.active[static_printname].keys()[0] #If static_machine is a string, we don't have much to do #logging.debug('ur static_machine {0}, type: {1}'.format( # str(static_machine), str(type(static_machine)))) if isinstance(static_machine, str): if static_machine in self.active: # FIXME: [0] is a hack, fix it #logging.debug('ur str in active') return self.active[static_machine].keys()[0] else: if static_machine.startswith('#'): #logging.debug('ur waking up') self.wake_avm_construction(static_machine) return None #logging.debug('ur activating str') active_machine = Machine(static_machine, ConceptControl()) self.__add_active_machine(active_machine) return active_machine # If it's a machine, we create the corresponding active one elif isinstance(static_machine, Machine): static_name = static_machine.printname() #logging.debug('Does {0} start with #? {1}'.format( # static_name, static_name.startswith('#'))) if static_name in self.active: #logging.debug('ur machine in active') active_machine = self.active[static_name].keys()[0] else: #logging.debug('Not in active') if static_name.startswith('#'): #logging.debug('ur waking up') self.wake_avm_construction(static_name) return None #logging.debug('ur activating machine') active_machine = Machine(static_name) active_control = copy.copy(static_machine.control) #active_control = copy.deepcopy(static_machine.control) #deepcopy causes infinite recursion, I hope shallow copy #works, since the active machine will update the control's #machine attribute (and we don't know of anything else) active_machine.set_control(active_control) self.__add_active_machine(active_machine) stop.add(static_name) # Now we have to walk through the tree recursively for i, part in enumerate(static_machine.partitions): for ss_machine in part: as_machine = self.unify_recursively(ss_machine, zeros_only, first=False, stop=stop) if as_machine is not None: #logging.info('adding {} to part {} of {}'.format( # as_machine, i, active_machine)) active_machine.append(as_machine, i) return active_machine else: raise TypeError('static_machine must be a Machine or a str')
def __add_static_recursive(self, curr_from, replacement=None): if replacement is None: replacement = {} #print "Processing word", curr_from #sys.stdout.flush() if curr_from not in replacement: # Deep cases are not canonized if curr_from.deep_case(): replacement[curr_from] = curr_from else: """ try: if curr_from.printname().isupper(): curr_from.printname_ = curr_from.printname().lower() except AttributeError, e: logging.info('curr_from: {0}, type: {1}'.format( curr_from, type(curr_from))) raise Exception(e) """ #print "Not in replacement" # Does this machine appear in the static tree? from_already_seen = self.__get_disambig_incomplete( curr_from.printname()) #print ("from already seen", curr_from.printname(), # from_already_seen # If not: simply adding the new machine/definition... if len(from_already_seen) == 0: #print "from already seen = 0" # This is the definition word, or no children: accept as # canonical / placeholder if len(curr_from.children()) == 0 or len(replacement) == 0: #print "adding as canoncical" from_already_seen = [curr_from] # Otherwise add a placeholder + itself to static else: #print "adding as placeholder" from_already_seen = [ Machine(curr_from.printname()), curr_from ] self.static[curr_from.printname()] = from_already_seen #print ("Adding to static", curr_from.printname(), # from_already_seen) self.__add_to_disambig(curr_from.printname()) replacement[curr_from] = curr_from # print self.static, self.static_disambig else: #print "in static", from_already_seen # Definitions: the word is the canonical one, regardless of # the number of children if len(replacement) == 0: #print "definition" canonical = from_already_seen[0] canonical.printname_ = curr_from.printname() canonical.control = curr_from.control replacement[curr_from] = canonical # Handling non-definition words else: #print "not definition" canonical = from_already_seen[0] # No children: replace with the canonical if len(curr_from.children()) == 0: #print "no children" replacement[curr_from] = canonical # Otherwise: add the new machine to static, and keep it else: #print "children" replacement[curr_from] = curr_from from_already_seen.append(curr_from) # Copying the children... curr_to = replacement[curr_from] from_partitions = [[m for m in p] for p in curr_from.partitions] for part_i, part in enumerate(from_partitions): for child in part: #print "found child", child #Remove to delete any parent links #print "part before", part, curr_from.partitions[part_i] curr_from.remove(child, part_i) #print "part after", part, curr_from.partitions[part_i] curr_to.append( self.__add_static_recursive(child, replacement), part_i) return replacement[curr_from]
def get_new_machine(self, printname): """returns a new machine without adding it to any lexicon""" return Machine(printname, ConceptControl())
def unify_recursively(self, static_machine, zeros_only, first=False, stop=None): """Returns the active machine that corresponds to @p static_machine. It recursively unifies all machines in all partitions of @p static_machine with machines in the active set. @p static_machine may be either a machine or a string. @param stop the set of machines already unified.""" if stop is None: stop = set() if unicode(static_machine) == u'IS_A': return None # If we have already unified this machine: just return if (not isinstance(static_machine, str) and not isinstance(static_machine, unicode)): static_printname = static_machine.printname() else: static_printname = static_machine if static_printname in stop: #logging.debug('ur stops') return self.active[static_printname].keys()[0] #If static_machine is a string, we don't have much to do #logging.debug('ur static_machine {0}, type: {1}'.format( # str(static_machine), str(type(static_machine)))) if isinstance(static_machine, str): if static_machine in self.active: # FIXME: [0] is a hack, fix it #logging.debug('ur str in active') return self.active[static_machine].keys()[0] else: if static_machine.startswith('#'): #logging.debug('ur waking up') self.wake_avm_construction(static_machine) return None #logging.debug('ur activating str') active_machine = Machine(static_machine, ConceptControl()) self.__add_active_machine(active_machine) return active_machine # If it's a machine, we create the corresponding active one elif isinstance(static_machine, Machine): static_name = static_machine.printname() #logging.debug('Does {0} start with #? {1}'.format( # static_name, static_name.startswith('#'))) if static_name in self.active: #logging.debug('ur machine in active') active_machine = self.active[static_name].keys()[0] else: #logging.debug('Not in active') if static_name.startswith('#'): #logging.debug('ur waking up') self.wake_avm_construction(static_name) return None #logging.debug('ur activating machine') active_machine = Machine(static_name) active_control = copy.copy(static_machine.control) #active_control = copy.deepcopy(static_machine.control) #deepcopy causes infinite recursion, I hope shallow copy #works, since the active machine will update the control's #machine attribute (and we don't know of anything else) active_machine.set_control(active_control) self.__add_active_machine(active_machine) stop.add(static_name) # Now we have to walk through the tree recursively for i, part in enumerate(static_machine.partitions): for ss_machine in part: as_machine = self.unify_recursively( ss_machine, zeros_only, first=False, stop=stop) if as_machine is not None: #logging.info('adding {} to part {} of {}'.format( # as_machine, i, active_machine)) active_machine.append(as_machine, i) return active_machine else: raise TypeError('static_machine must be a Machine or a str')
# logging.warning('duplicate pn: {0}, machines: {1}, {2}'.format( # pn, d[pn], "{0}:{1}".format(m, m.partitions))) d[m.printname()].add(m) logging.debug('\n'+m.to_debug_str()) except pyparsing.ParseException, pe: print l logging.error("Error: "+str(pe)) return d def read_plur(_file): plur_dict = {} for line in _file: plur, sg = line.split() plur_dict[plur] = sg return plur_dict if __name__ == "__main__": logging.basicConfig(level=logging.WARNING, format="%(asctime)s : %(module)s (%(lineno)s) " + "- %(levelname)s - %(message)s") plur_dict = read_plur(open('/home/recski/projects/4lang/4lang.plural')) dp = DefinitionParser(plur_dict) pstr = sys.argv[-1] if sys.argv[1] == "-d": print Machine.to_debug_str(dp.parse_into_machines(pstr), max_depth=99) elif sys.argv[1] == "-f": lexicon = read(file(sys.argv[2]), '../../res/4lang/4lang.plural', three_parts=True) else: print dp.parse(pstr)
# logging.warning('duplicate pn: {0}, machines: {1}, {2}'.format( # pn, d[pn], "{0}:{1}".format(m, m.partitions))) d[m.printname()].add(m) logging.debug('\n'+m.to_debug_str()) except pyparsing.ParseException, pe: print l logging.error("Error: "+str(pe)) return d def read_plur(_file): plur_dict = {} for line in _file: plur, sg = line.split() plur_dict[plur] = sg return plur_dict if __name__ == "__main__": plural_f = '../../4lang/4lang.plural' logging.basicConfig(level=logging.WARNING, format="%(asctime)s : %(module)s (%(lineno)s) " + "- %(levelname)s - %(message)s") plur_dict = read_plur(open(plural_f)) dp = DefinitionParser(plur_dict) pstr = sys.argv[-1] if sys.argv[1] == "-d": print Machine.to_debug_str(dp.parse_into_machines(pstr), max_depth=99) elif sys.argv[1] == "-f": lexicon = read(file(sys.argv[2]), plural_f, three_parts=True) else: print dp.parse(pstr)