def expand(word, flags, flush_stdout): flag_set = flags.split(" ", 1) main_flag = flag_set[0] if len(flag_set) > 1: extra = flag_set[1] else: extra = "" modifiers = get_modifiers(extra, flags, word) if main_flag[0] == "/": inflection_flag = main_flag[1:] sfx_lines = expand_suffixes(word, inflection_flag, modifiers, extra) sfx_lines = adjust_affix_tags(sfx_lines, main_flag, flags, modifiers) else: sfx_lines = [word + " " + word + " " + flags] sfx_lines = [ line.replace("v_zna", "v_zn1") if "adj:m:v_rod/v_zna" in line else line for line in sfx_lines ] sfx_lines = affix.expand_alts(sfx_lines, "//") # TODO: change this to some single-char splitter? sfx_lines = affix.expand_alts(sfx_lines, "/") if "/adj" in flags: out_lines = [] for line in sfx_lines: if "v_zn1" in line: if "^noun" in flags or "<" in flags: line = line.replace("v_zn1", "v_zna") else: line = line.replace("v_zn1", "v_zna:ranim") elif "v_zn2" in line: if "^noun" in flags or "<" in flags: line = line.replace("v_zn2", "v_zna") else: line = line.replace("v_zn2", "v_zna:rinanim") out_lines.append(line) sfx_lines = out_lines if main_flag[0] != "/": sfx_lines = util.expand_nv(sfx_lines) sfx_lines = modify(sfx_lines, modifiers) if "\\" in flags: for i in range(0, len(sfx_lines)): sfx_lines[i] = sfx_lines[i] + ":compb" words = post_expand(sfx_lines, flags) return words
def preprocess(line): if line.count(" /") > 1: parts = line.split(" ") line1 = parts[:2] + parts[3:] line2 = parts[:1] + parts[2:] lines = [" ".join(line1), " ".join(line2)] else: lines = affix.expand_alts([line], "|") out_lines = [] for line in lines: out_lines.extend(preprocess2(line)) return out_lines
def expand_line(line, flush_stdout): global main_word global main_flag global last_adv lines = preprocess(line) out_lines = [] for line in lines: sub_lines = [] # +cs if "\\ +" in line: line, *sub_lines = line.split("\\") line = line.rstrip() if " :" in line or not " /" in line: line += ":compb" else: line += " :compb" # print(" \\+", line, file=sys.stderr) # main_word = line # sublines = expand_subposition(main_word, line) # out_lines.extend( sublines ) # word lemma tags elif word_lemma_re.search(line): if "/" in line: exp_lines = affix.expand_alts([line], "//") # TODO: change this to some single-char splitter? try: exp_lines = affix.expand_alts(exp_lines, "/") except: print("Failed to expand", exp_lines, file=sys.stderr) raise else: exp_lines = [ line ] if ":nv" in line and not "v_" in line: exp_lines = util.expand_nv(exp_lines) out_lines.extend( exp_lines ) continue # word tags # word /flags [mods] [tags] try: word, flags = line.split(" ", 1) except: print("Failed to find flags in", line, file=sys.stderr) raise main_word = word inflected_lines = expand(word, flags, flush_stdout) if sub_lines: idx = 0 for sub_line in sub_lines: if flags.startswith("adv:"): extra_flags = flags[3:].replace(":compb", "") # util.dbg("sub_lines: %s, %s", flags, extra_flags) elif " :" in flags or flags.startswith(":"): extra_flags = re_search("(^| )(:[^ ]+)", flags).group(2).replace(":compb", "") # util.dbg("===", extra_flags) else: extra_flags = "" if " adv" in line: sublines = expand_subposition_adv_main(main_word, sub_line, extra_flags) else: sublines = expand_subposition(main_word, sub_line, extra_flags, idx) out_lines.extend( sublines ) if ".adv" in line and "/adj" in line: for inflected_line in inflected_lines: if " adv" in inflected_line: last_adv = inflected_line.split()[0] cs_lines = expand_subposition_adv(last_adv, sub_line, extra_flags) out_lines.extend(cs_lines) break # print(".adv", last_adv, file=sys.stderr) idx += 1 out_lines.extend( inflected_lines ) for l in inflected_lines: if not l.strip(): raise Exception("empty liner", inflected_lines) return post_process(out_lines)