def get_wordarr(st): if is_terminalc(st.type): return st.text r = [] for c in st.left + st.blocks + st.right: if is_terminalc(c.type) and c.meaning == None: continue if get_wordarr(c) != None: r += get_wordarr(c) return r
def pre_morphen(lang, st, ind = 0): '''Init's the main morpho-groups before doing morphological generation''' st.parent = None if is_terminalc(st.type): if not st.fixed or is_empty_word(st): yield st return variants = [x for x in lang.words if all(y == concept["tags"] or eqx(y, x, st) for y in concept)] #musteq variants = score_tags(variants, st) new_variants = [] if PRINT_TO_CONSOLE and len(variants) > 1: print("CAUTION: Ambiguity in 'pre_morphen' at %s" % st.descr()) for variant in variants: for new_variant in new_variants: for f in st.fixed: if variant.attr(f) != new_variant.attr(f): break else: break else: for f in st.fixed: ################################################# if variant.attr(f) == NONE_VALUE: print("xxxxxxxxxxxpre_morphenpre_morphenpre_morphenpre_morphen") break############################################################################################################################# else: new_variants += [variant] variants = new_variants if variants == []: return for variant in variants: c = deepcopy(st) for f in c.fixed: c.attr(f, variant.attr(f)) yield c return res = [] for x in pre_morphen(lang, (st.left + st.blocks + st.right)[ind]): c = deepcopy(st) if ind < len(c.left): c.left[ind] = x elif ind < len(c.left + c.blocks): c.blocks[ind - len(c.left)] = x else: c.right[ind - len(c.left + c.blocks)] = x x.parent = c if not x.refresh(): continue res += [c] res = the_bests(lang, eupony_score, res) for c in res: if ind + 1 == len(c.left + c.blocks + c.right): yield c else: for y in pre_morphen(lang, c, ind + 1): yield y
def get_tranarr(st): if st.type is None: raise Exception('%s has None type' % st.meaning) if is_terminalc(st.type): return st.transcription r = [] for c in st.left + st.cblocks + st.right: t = get_tranarr(c) if t != None: r += t return r
def lang_to_case_frame(unit): '''Translates the source language AST to Interlingua''' cf = {} if is_terminalc(unit.type): for c in transferred_attributes[unit.type]: cf[c] = unit.attr(c) if cf[c] == None and c in default.keys(): cf[c] = default[c] elif len(unit.blocks) > 1 and not unit.relation is None: t = unit.blocks[0].type cf[concept["conj-str"]] = unit.relation.conj_str cf[concept["conj-type"]] = unit.relation.conj_type cf[concept["conj-function"]] = unit.relation.function_number cf[t] = [lang_to_case_frame(y)[y.type] for y in unit.blocks] else: for e in unit.left + unit.right: if not e.type in modificators: ##and (e.type != TERMINAL_DETERMINER or not e.attr(concept["difinity"])): #OR JUST TERMINAL_ARTICLE update_dict(cf, lang_to_case_frame(e)) for c in transferred_attributes[unit.type]: if unit.attr(c) == None: if c in default: cf[c] = default[c] elif not c in exclusions: print(unit.type, c) raise NotImplementedError #continue else: cf[c] = unit.attr(c) if len(unit.blocks) == 1: if not cf.get(concept["quantity"], None) in [None, NONE_VALUE]: transferred_attributes[const.type["noun"]].remove(concept["real-number"]) cf.update(lang_to_case_frame(unit.blocks[0])) if not cf.get(concept["quantity"], None) in [None, NONE_VALUE]: transferred_attributes[const.type["noun"]].append(concept["real-number"]) else: for b in unit.blocks: if b.type in cf: cf[b.type] += [lang_to_case_frame(b)[b.type]] else: cf[b.type] = [lang_to_case_frame(b)[b.type]] # #mosaic translation # if not curr_type in cf.keys(): # for i in min_str.get(curr_type, []): # if not i in cf.keys(): # print(str(i), str(curr_type), cf) # raise NotImplementedError() return {unit.type: cf}
def morphen(lang, st, ind = 0): '''Morphological generation''' st.parent = None if is_terminalc(st.type): if is_empty_word(st): yield st return variants = [x for x in lang.words if all(y == concept["tags"] or eqx(y, x, st) for y in concept)] #musteq variants = score_tags(variants, st) if variants == [] or variants is None: raise Exception("Not found in the '%s' dictionary %s" % (lang.name, st.descr())) if PRINT_TO_CONSOLE and len(variants) > 1: tmp = str([x.descr() for x in variants]) print("CAUTION: Ambiguity in 'morphen' at %s alternatives %d (%s)" % (st.descr(), len(variants), tmp)) for variant in variants: c = deepcopy(st) if c.type is None: raise Exception('%s hasn\'t type' % str(st)) c.text = variant.text c.transcription = variant.transcription for p in concept: c.attr(p, variant.attr(p)) yield c return res = [] for x in morphen(lang, (st.left+st.blocks+st.right)[ind]): c = deepcopy(st) if ind < len(c.left): c.left[ind] = x elif ind < len(c.left + c.blocks): c.blocks[ind - len(c.left)] = x else: c.right[ind - len(c.left + c.blocks)] = x x.parent = c if not x.refresh(): continue res += [c] res = the_bests(lang, eupony_score, res) for c in res: if ind + 1 == len(c.left+c.blocks+c.right): yield c else: for y in morphen(lang, c, ind + 1): yield y
def meaning_shift(cf, lang): '''Interlingua approximation''' if type(cf) != type({}): return cf for key in [x for x in cf.keys()]: #beacause of conjunctions is the error! #print(key) if is_terminalc(key): if type(cf[key]) == type([]): r = [] for c in cf[key]: r += [meaning_shift(c, lang)] cf[key] = r else: lemma = cf[key][concept["lemma"]] if not lemma is None and not lemma in lang.meanings.keys() \ and cf[key][concept["lemma"]] in cf_diff: #replace with deffination #checking for properness #print(cf[key][concept["lemma"]], cf_diff[cf[key][concept["lemma"]]]) q = cf_diff[cf[key][concept["lemma"]]] temp = meaning_shift(q, lang) tt = cf[key][concept["order-number"]] del cf[key] h = temp[[k for k in temp.keys()][0]] #because of {16: {16: #WTF? IT WILL CHECK IT TWICE!!! for key in h.keys(): cf[key] = h[key] cf[concept["order-number"]] = tt #print(cf) ### elif type(cf[key]) == tuple: ### ci, children = cf[key] ### r = [] ### for c in children: ### r += [meaning_shift(c, lang)] ### cf[key] = ci, r else: cf[key] = meaning_shift(cf[key], lang) return cf