def fil_x(words, ent_form, ent_number): inds = [i for i,x in enumerate(ent_form) if x.isupper()] if inds: ent_form2 = ent_form.lower() else: ent_form2 = ent_form if '[X]' in words: i = words.index('[X]') words[i] = ent_form ent_case = "NOM" elif "[X.Loc]" in words: i = words.index('[X.Loc]') temp = inflect(ent_form2, f"N;LOC;{ent_number}", language='tur')[0] if inds: words[i] = fix_up(temp, inds) else: words[i] = temp ent_case = "LOC" elif "[X.Gen]" in words: i = words.index('[X.Gen]') temp = inflect(ent_form2, f"N;GEN;{ent_number}", language='tur')[0] if inds: words[i] = fix_up(temp, inds) else: words[i] = temp ent_case = "GEN" elif "[X.Acc]" in words: i = words.index('[X.Acc]') temp = inflect(ent_form2, f"N;ACC;{ent_number}", language='tur')[0] if inds: words[i] = fix_up(temp, inds) else: words[i] = temp ent_case = "ACC" elif "[X.Dat]" in words: i = words.index('[X.Dat]') temp = inflect(ent_form2, f"N;DAT;{ent_number}", language='tur')[0] if inds: words[i] = fix_up(temp, inds) else: words[i] = temp ent_case = "DAT" elif "[X.Abl]" in words: i = words.index('[X.Abl]') temp = inflect(ent_form2, f"N;ABL;{ent_number}", language='tur')[0] if inds: words[i] = fix_up(temp, inds) else: words[i] = temp ent_case = "ABL" if '[X;be]' in words: i = words.index('[X;be]') words[i] = add_be(ent_form, ent_number) return words
def fil_x(words, ent_form, ent_number, is_human): if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[X]' in words: i = words.index('[X]') words[i] = ent_form ent_case = "NOM" elif "[X.LOC]" in words: i = words.index('[X.LOC]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;LOC;{ent_number}", language='ben')[0] #words[i] = ent_form+'त' ent_case = "LOC" elif "[X.GEN]" in words: i = words.index('[X.GEN]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='ben')[0] #words[i] = ent_form+'त' ent_case = "GEN" # Now check for the ones that we have a fixed suffix: for i, w in enumerate(words): if w[:3] == '[X]' and len(w) > 3: words[i] = ent_form + w[3:] # Now also check the corresponfing verbs, if they exist. # Needed for subject-verb agreement for i, w in enumerate(words): if w[0] == '[' and 'human' in w: if '|' in w: options = w.strip()[1:-1].split('|') if is_human: form = options[0].strip().split(':')[1] words[i] = form else: form = options[1].strip().split(':')[1] words[i] = form return words
def fil_x(words, ent_form, ent_number, is_human): if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[X]' in words: i = words.index('[X]') words[i] = ent_form ent_case = "NOM" elif "[X.ACC]" in words: i = words.index('[X.ACC]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='hun')[0] #words[i] = ent_form+'त' ent_case = "ACC" elif "[X.DAT]" in words: i = words.index('[X.DAT]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;DAT;{ent_number}", language='hun')[0] #words[i] = ent_form+'त' ent_case = "DAT" elif "[X.ON+ESS]" in words: i = words.index('[X.ON+ESS]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ON+ESS;{ent_number}", language='hun')[0] ent_case = "ON+ESS" return words
def fil_y(words, ent_form, ent_number, is_human): if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[Y]' in words: i = words.index('[Y]') words[i] = ent_form ent_case = "NOM" elif "[Y.LOC]" in words: i = words.index('[Y.LOC]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;LOC;{ent_number}", language='ben')[0] #words[i] = ent_form+'त' ent_case = "LOC" elif "[Y.GEN]" in words: i = words.index('[Y.GEN]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='ben')[0] #words[i] = ent_form+'त' ent_case = "GEN" # Now check for the ones that we have a fixed suffix: for i, w in enumerate(words): if w[:3] == '[Y]' and len(w) > 3: words[i] = ent_form + w[3:] return words
def fil_y(words, ent_form, ent_number, is_human): if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[Y]' in words: i = words.index('[Y]') words[i] = ent_form ent_case = "NOM" elif "[Y.IN+ESS]" in words: i = words.index('[Y.IN+ESS]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;IN+ESS;{ent_number}", language='hun')[0] ent_case = "IN+ESS" elif "[Y.IN+ABL]" in words: i = words.index('[Y.IN+ABL]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;IN+ABL;{ent_number}", language='hun')[0] ent_case = "IN+ABL" elif "[Y.ON+ESS]" in words: i = words.index('[Y.ON+ESS]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ON+ESS;{ent_number}", language='hun')[0] ent_case = "ON+ESS" elif "[Y.DAT]" in words: i = words.index('[Y.DAT]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;DAT;{ent_number}", language='hun')[0] ent_case = "DAT" elif "[Y.ACC]" in words: i = words.index('[Y.ACC]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='hun')[0] ent_case = "ACC" elif "[Y.INST]" in words: i = words.index('[Y.INST]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;INST;{ent_number}", language='hun')[0] ent_case = "INST" # Now check for the ones that we have a fixed suffix: for i, w in enumerate(words): if w[:3] == '[Y]' and len(w) > 3: words[i] = ent_form + w[3:] return words
def fil_x(words, ent_form, ent_gender): #ent_form = entities[ent_id][0] #ent_gender = entities[ent_id][1].upper() ent_number = "SG" if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[X]' in words: i = words.index('[X]') words[i] = ent_form ent_case = "NOM" elif "[X.Nom]" in words: # In Greek the default case is Nominative so we don't need to try to inflect it i = words.index('[X.Nom]') words[i] = ent_form ent_case = "NOM" elif "[X.Masc.Nom]" in words: # In Greek the default case is Nominative so we don't need to try to inflect it i = words.index('[X.Masc.Nom]') words[i] = ent_form ent_case = "NOM" ent_gender = "Masc" elif "[X.Gen]" in words: i = words.index('[X.Gen]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='rus')[0] ent_case = "GEN" elif "[X.Ess]" in words: i = words.index('[X.Ess]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ESS;{ent_number}", language='rus')[0] ent_case = "ESS" # Now also check the corresponfing verbs, if they exist for i,w in enumerate(words): if w[0] == '[' and 'X-Gender' in w: if '|' in w: options = w.strip()[1:-1].split('|') if ent_gender == "MASC": form = options[0].strip().split(';')[0] words[i] = form elif ent_gender == "FEM": form = options[1].strip().split(';')[0] words[i] = form elif ent_gender == "NEUT": form = options[2].strip().split(';')[0] words[i] = form else: form = options[0].strip().split(';')[0] words[i] = form #else: # lemma = w.strip()[1:-1].split('.')[0] # if "Pst" in w: # form2 = inflect(lemma, f"V;PST;SG;{ent_gender}", language='rus')[0] # elif "Lgspec1" in w: # form2 = inflect(lemma, f"ADJ;{ent_gender};SG;LGSPEC1", language='rus')[0] # words[i] = form2 return words
def fil_y(words, ent_form, ent_gender): #ent_form = entities[ent_id][0] #ent_gender = entities[ent_id][1].upper() ent_number = "SG" if some_roman_chars(ent_form) or ent_form.isupper(): do_not_inflect = True else: do_not_inflect = False if '[Y]' in words: i = words.index('[Y]') words[i] = ent_form ent_case = "NOM" elif "[Y.Nom]" in words: # In Greek the default case is Nominative so we don't need to try to inflect it i = words.index('[Y.Nom]') words[i] = ent_form ent_case = "NOM" elif "[Y.Gen]" in words: i = words.index('[Y.Gen]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='rus')[0] ent_case = "GEN" elif "[Y.Acc]" in words: i = words.index('[Y.Acc]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='rus')[0] ent_case = "ACC" elif "[Y.Dat]" in words: i = words.index('[Y.Dat]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;DAT;{ent_number}", language='rus')[0] ent_case = "DAT" elif "[Y.Ess]" in words: i = words.index('[Y.Ess]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ESS;{ent_number}", language='rus')[0] ent_case = "ESS" elif "[Y.Ins]" in words: i = words.index('[Y.Ins]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;INS;{ent_number}", language='rus')[0] ent_case = "INS" # Now also check the correponsing articles, if the exist for i,w in enumerate(words): if w[0] == '[' and 'Y-Gender' in w: if '|' in w: options = w.strip()[1:-1].split('|') if ent_gender == "MASC": form = options[0].strip().split(';')[0] words[i] = form elif ent_gender == "FEM": form = options[1].strip().split(';')[0] words[i] = form elif ent_gender == "NEUT": form = options[2].strip().split(';')[0] words[i] = form if "Pst" in w: form2 = inflect(lemma, f"V;PST;SG;{ent_gender}", language='rus')[0] elif "Lgspec1" in w: form2 = inflect(lemma, f"ADJ;{ent_gender};SG;LGSPEC1", language='rus')[0] words[i] = form2 return words
def fil_x(words, ent_form, ent_gender, ent_number, article): #ent_form = entities[ent_id][0] #ent_gender = entities[ent_id][1].upper() #ent_number = "SG" if ent_form[-2:] == "ες": ent_number = "PL" ent_gender = "FEM" if some_roman_chars(ent_form) or ent_form.isupper() or ent_form[-1] in [ 'β', 'γ', 'δ', 'ζ', 'κ', 'λ', 'μ', 'ν', 'ξ', 'π', 'ρ', 'τ', 'φ', 'χ', 'ψ' ]: do_not_inflect = True else: do_not_inflect = False if '[X]' in words: i = words.index('[X]') words[i] = ent_form ent_case = "NOM" elif "[X.Nom]" in words: # In Greek the default case is Nominative so we don't need to try to inflect it i = words.index('[X.Nom]') words[i] = ent_form ent_case = "NOM" elif "[X.Gen]" in words: i = words.index('[X.Gen]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='ell2')[0] ent_case = "GEN" elif "[X.Acc]" in words: i = words.index('[X.Acc]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='ell2')[0] ent_case = "ACC" # Now also check the correponsing articles, if the exist if "[DEF;X]" in words: i = words.index('[DEF;X]') words[i] = article[f"ART;DEF;{ent_gender};{ent_number};{ent_case}"] if "[DEF.Gen;X]" in words: i = words.index('[DEF.Gen;X]') words[i] = article[f"ART;DEF;{ent_gender};{ent_number};GEN"] if "[PREPDEF;X]" in words: i = words.index('[PREPDEF;X]') words[i] = article[f"ART;PREPDEF;{ent_gender};{ent_number};{ent_case}"] # Now also check the corresponfing verbs, if they exist. # Needed for subject-verb agreement for i, w in enumerate(words): if w[0] == '[' and 'X-Number' in w: if '|' in w: options = w.strip()[1:-1].split('|') if ent_number == "SG": form = options[0].strip().split(';')[0] words[i] = form else: form = options[1].strip().split(';')[0] words[i] = form return words
def fil_y(words, ent_form, ent_gender, ent_number, article): #ent_form = entities[ent_id][0] #ent_gender = entities[ent_id][1].upper() #ent_number = "SG" if ent_form[-2:] == "ες" or ent_form[-2:] == "ές": ent_number = "PL" ent_gender = "FEM" #elif ent_form[-1] == "ά": # ent_number = "PL" # ent_gender = "NEUT" if some_roman_chars(ent_form) or ent_form.isupper() or ent_form[-1] in [ 'β', 'γ', 'δ', 'ζ', 'κ', 'λ', 'μ', 'ν', 'ξ', 'π', 'ρ', 'τ', 'φ', 'χ', 'ψ' ]: do_not_inflect = True else: do_not_inflect = False if '[Y]' in words: i = words.index('[Y]') words[i] = ent_form ent_case = "NOM" elif "[Y.Nom]" in words: # In Greek the default case is Nominative so we don't need to try to inflect it i = words.index('[Y.Nom]') words[i] = ent_form ent_case = "NOM" elif "[Y.Gen]" in words: i = words.index('[Y.Gen]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='ell2')[0] ent_case = "GEN" elif "[Y.Acc]" in words: i = words.index('[Y.Acc]') if do_not_inflect: words[i] = ent_form else: words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='ell2')[0] ent_case = "ACC" # Now also check the correponsing articles, if they exist if "[DEF;Y]" in words: i = words.index('[DEF;Y]') words[i] = article[f"ART;DEF;{ent_gender};{ent_number};{ent_case}"] if "[DEF.Gen;Y]" in words: i = words.index('[DEF.Gen;Y]') words[i] = article[f"ART;DEF;{ent_gender};{ent_number};GEN"] if "[PREPDEF;Y]" in words: i = words.index('[PREPDEF;Y]') words[i] = article[f"ART;PREPDEF;{ent_gender};{ent_number};{ent_case}"] if "[INDEF;Y]" in words: i = words.index('[INDEF;Y]') #print(f"ART;INDEF;{ent_gender};{ent_number};{ent_case}") #print(article[f"ART;INDEF;{ent_gender};{ent_number};{ent_case}"]) words[i] = article[f"ART;INDEF;{ent_gender};{ent_number};{ent_case}"] if "[DEF;Y.Fem]" in words: i = words.index('[DEF;Y.Fem]') words[i] = article[f"ART;DEF;FEM;{ent_number}"] return words
help='output file path', default=None) # misc arguments parser.add_argument('--force-download', help='force download of models', action='store_true') args = parser.parse_args() # set output file path if args.output is None: output_file_path = 'text_file.out' else: output_file_path = args.output language = args.language print(inflect("love", "V;3;SG", language=language)) print(inflect("love", "V;NFIN", language=language)) print(inflect("love", "V;V.PTCP;PRS", language=language)) print(inflect("drink", "V;3;SG", language=language)) print(inflect("drink", "V;NFIN", language=language)) print(inflect("drink", "V;V.PTCP;PRS", language=language)) print(inflect("drink", "N;3;SG", language=language)) print(inflect("αντίο", "V;3;SG", language=language)) language = 'ell' print(inflect("Βέλγιο", "N;NOM;PL", language=language)) print(inflect("Βέλγιο", "N;NEUT;GEN;SG", language=language)) print(inflect("βέλγικη", "ADJ;FEM;GEN;SG", language=language)) print(inflect("ανταγωνιστικότητα", "N;ACC;PL", language=language)) print( inflect(["βλέπω", "ακούω"], ["V;3;SG;IPFV;PRS", "V;PFV;PST;3;PL"],
parser.add_argument( '-t', '--testfile', help='file to test on (in Unimorph format) | default: <empty>', default='') parser.add_argument('-o', '--output', help='output file path', default=None) # misc arguments parser.add_argument('--force-download', help='force download of models', action='store_true') args = parser.parse_args() # set output file path if args.output is None: output_file_path = 'text_file.out' else: output_file_path = args.output language = args.language try: inputs, outputs, tags = simple_read_data(args.testfile) curr_out = inflect(inputs, tags, language=language) correct = [o == c for o, c in zip(outputs, curr_out)] print(f"Accuracy: {float(sum(correct))/len(correct)}") except: print("dangit")