예제 #1
0
def fil_x(words, ent_form, ent_number):
	
	inds = [i for i,x in enumerate(ent_form) if x.isupper()]
	if inds:
		ent_form2 = ent_form.lower()
	else:
		ent_form2 = ent_form



	if '[X]' in words:
		i = words.index('[X]')
		words[i] = ent_form
		ent_case = "NOM"
	elif "[X.Loc]" in words:
		i = words.index('[X.Loc]')
		temp = inflect(ent_form2, f"N;LOC;{ent_number}", language='tur')[0]
		if inds:
			words[i] = fix_up(temp, inds)
		else:
			words[i] = temp
		ent_case = "LOC"
	elif "[X.Gen]" in words:
		i = words.index('[X.Gen]')
		temp = inflect(ent_form2, f"N;GEN;{ent_number}", language='tur')[0]
		if inds:
			words[i] = fix_up(temp, inds)
		else:
			words[i] = temp
		ent_case = "GEN"
	elif "[X.Acc]" in words:
		i = words.index('[X.Acc]')
		temp = inflect(ent_form2, f"N;ACC;{ent_number}", language='tur')[0]
		if inds:
			words[i] = fix_up(temp, inds)
		else:
			words[i] = temp
		ent_case = "ACC"
	elif "[X.Dat]" in words:
		i = words.index('[X.Dat]')
		temp = inflect(ent_form2, f"N;DAT;{ent_number}", language='tur')[0]
		if inds:
			words[i] = fix_up(temp, inds)
		else:
			words[i] = temp
		ent_case = "DAT"
	elif "[X.Abl]" in words:
		i = words.index('[X.Abl]')
		temp = inflect(ent_form2, f"N;ABL;{ent_number}", language='tur')[0]
		if inds:
			words[i] = fix_up(temp, inds)
		else:
			words[i] = temp
		ent_case = "ABL"

	if '[X;be]' in words:
		i = words.index('[X;be]')
		words[i] = add_be(ent_form, ent_number)
	
	return words
예제 #2
0
def fil_x(words, ent_form, ent_number, is_human):

    if some_roman_chars(ent_form) or ent_form.isupper():
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[X]' in words:
        i = words.index('[X]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[X.LOC]" in words:
        i = words.index('[X.LOC]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;LOC;{ent_number}",
                               language='ben')[0]
            #words[i] = ent_form+'त'
        ent_case = "LOC"
    elif "[X.GEN]" in words:
        i = words.index('[X.GEN]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;GEN;{ent_number}",
                               language='ben')[0]
            #words[i] = ent_form+'त'
        ent_case = "GEN"

    # Now check for the ones that we have a fixed suffix:
    for i, w in enumerate(words):
        if w[:3] == '[X]' and len(w) > 3:
            words[i] = ent_form + w[3:]

    # Now also check the corresponfing verbs, if they exist.
    # Needed for subject-verb agreement
    for i, w in enumerate(words):
        if w[0] == '[' and 'human' in w:
            if '|' in w:
                options = w.strip()[1:-1].split('|')
                if is_human:
                    form = options[0].strip().split(':')[1]
                    words[i] = form
                else:
                    form = options[1].strip().split(':')[1]
                    words[i] = form

    return words
예제 #3
0
def fil_x(words, ent_form, ent_number, is_human):

    if some_roman_chars(ent_form) or ent_form.isupper():
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[X]' in words:
        i = words.index('[X]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[X.ACC]" in words:
        i = words.index('[X.ACC]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;ACC;{ent_number}",
                               language='hun')[0]
            #words[i] = ent_form+'त'
        ent_case = "ACC"
    elif "[X.DAT]" in words:
        i = words.index('[X.DAT]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;DAT;{ent_number}",
                               language='hun')[0]
            #words[i] = ent_form+'त'
        ent_case = "DAT"
    elif "[X.ON+ESS]" in words:
        i = words.index('[X.ON+ESS]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;ON+ESS;{ent_number}",
                               language='hun')[0]
        ent_case = "ON+ESS"

    return words
예제 #4
0
def fil_y(words, ent_form, ent_number, is_human):

    if some_roman_chars(ent_form) or ent_form.isupper():
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[Y]' in words:
        i = words.index('[Y]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[Y.LOC]" in words:
        i = words.index('[Y.LOC]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;LOC;{ent_number}",
                               language='ben')[0]
            #words[i] = ent_form+'त'
        ent_case = "LOC"
    elif "[Y.GEN]" in words:
        i = words.index('[Y.GEN]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;GEN;{ent_number}",
                               language='ben')[0]
            #words[i] = ent_form+'त'
        ent_case = "GEN"

    # Now check for the ones that we have a fixed suffix:
    for i, w in enumerate(words):
        if w[:3] == '[Y]' and len(w) > 3:
            words[i] = ent_form + w[3:]

    return words
예제 #5
0
def fil_y(words, ent_form, ent_number, is_human):

    if some_roman_chars(ent_form) or ent_form.isupper():
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[Y]' in words:
        i = words.index('[Y]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[Y.IN+ESS]" in words:
        i = words.index('[Y.IN+ESS]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;IN+ESS;{ent_number}",
                               language='hun')[0]
        ent_case = "IN+ESS"
    elif "[Y.IN+ABL]" in words:
        i = words.index('[Y.IN+ABL]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;IN+ABL;{ent_number}",
                               language='hun')[0]
        ent_case = "IN+ABL"
    elif "[Y.ON+ESS]" in words:
        i = words.index('[Y.ON+ESS]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;ON+ESS;{ent_number}",
                               language='hun')[0]
        ent_case = "ON+ESS"
    elif "[Y.DAT]" in words:
        i = words.index('[Y.DAT]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;DAT;{ent_number}",
                               language='hun')[0]
        ent_case = "DAT"
    elif "[Y.ACC]" in words:
        i = words.index('[Y.ACC]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form, f"N;ACC;{ent_number}",
                               language='hun')[0]
        ent_case = "ACC"
    elif "[Y.INST]" in words:
        i = words.index('[Y.INST]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;INST;{ent_number}",
                               language='hun')[0]
        ent_case = "INST"

    # Now check for the ones that we have a fixed suffix:
    for i, w in enumerate(words):
        if w[:3] == '[Y]' and len(w) > 3:
            words[i] = ent_form + w[3:]

    return words
예제 #6
0
def fil_x(words, ent_form, ent_gender):
	#ent_form = entities[ent_id][0]
	#ent_gender = entities[ent_id][1].upper()
	ent_number = "SG"

	if some_roman_chars(ent_form) or ent_form.isupper():
		do_not_inflect = True
	else:
		do_not_inflect = False


	if '[X]' in words:
		i = words.index('[X]')
		words[i] = ent_form
		ent_case = "NOM"
	elif "[X.Nom]" in words:
		# In Greek the default case is Nominative so we don't need to try to inflect it
		i = words.index('[X.Nom]')
		words[i] = ent_form
		ent_case = "NOM"
	elif "[X.Masc.Nom]" in words:
		# In Greek the default case is Nominative so we don't need to try to inflect it
		i = words.index('[X.Masc.Nom]')
		words[i] = ent_form
		ent_case = "NOM"
		ent_gender = "Masc"
	elif "[X.Gen]" in words:
		i = words.index('[X.Gen]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='rus')[0]
		ent_case = "GEN"
	elif "[X.Ess]" in words:
		i = words.index('[X.Ess]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;ESS;{ent_number}", language='rus')[0]
		ent_case = "ESS"

	# Now also check the corresponfing verbs, if they exist
	for i,w in enumerate(words):
		if w[0] == '[' and 'X-Gender' in w:
			if '|' in w:
				options = w.strip()[1:-1].split('|')
				if ent_gender == "MASC":
					form = options[0].strip().split(';')[0]
					words[i] = form
				elif ent_gender == "FEM":
					form = options[1].strip().split(';')[0]
					words[i] = form
				elif ent_gender == "NEUT":
					form = options[2].strip().split(';')[0]
					words[i] = form
				else:
					form = options[0].strip().split(';')[0]
					words[i] = form
			#else:
			#	lemma = w.strip()[1:-1].split('.')[0]
			#	if "Pst" in w:
			#		form2 = inflect(lemma, f"V;PST;SG;{ent_gender}", language='rus')[0]
			#	elif "Lgspec1" in w:
			#		form2 = inflect(lemma, f"ADJ;{ent_gender};SG;LGSPEC1", language='rus')[0]
			#	words[i] = form2
		
	return words
예제 #7
0
def fil_y(words, ent_form, ent_gender):
	#ent_form = entities[ent_id][0]
	#ent_gender = entities[ent_id][1].upper()
	ent_number = "SG"

	if some_roman_chars(ent_form) or ent_form.isupper():
		do_not_inflect = True
	else:
		do_not_inflect = False


	if '[Y]' in words:
		i = words.index('[Y]')
		words[i] = ent_form
		ent_case = "NOM"
	elif "[Y.Nom]" in words:
		# In Greek the default case is Nominative so we don't need to try to inflect it
		i = words.index('[Y.Nom]')
		words[i] = ent_form
		ent_case = "NOM"
	elif "[Y.Gen]" in words:
		i = words.index('[Y.Gen]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;GEN;{ent_number}", language='rus')[0]
		ent_case = "GEN"
	elif "[Y.Acc]" in words:
		i = words.index('[Y.Acc]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;ACC;{ent_number}", language='rus')[0]
		ent_case = "ACC"
	elif "[Y.Dat]" in words:
		i = words.index('[Y.Dat]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;DAT;{ent_number}", language='rus')[0]
		ent_case = "DAT"
	elif "[Y.Ess]" in words:
		i = words.index('[Y.Ess]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;ESS;{ent_number}", language='rus')[0]
		ent_case = "ESS"
	elif "[Y.Ins]" in words:
		i = words.index('[Y.Ins]')
		if do_not_inflect:
			words[i] = ent_form
		else:
			words[i] = inflect(ent_form, f"N;INS;{ent_number}", language='rus')[0]
		ent_case = "INS"

	# Now also check the correponsing articles, if the exist
	for i,w in enumerate(words):
		if w[0] == '[' and 'Y-Gender' in w:
			if '|' in w:
				options = w.strip()[1:-1].split('|')
				if ent_gender == "MASC":
					form = options[0].strip().split(';')[0]
					words[i] = form
				elif ent_gender == "FEM":
					form = options[1].strip().split(';')[0]
					words[i] = form
				elif ent_gender == "NEUT":
					form = options[2].strip().split(';')[0]
					words[i] = form
			if "Pst" in w:
				form2 = inflect(lemma, f"V;PST;SG;{ent_gender}", language='rus')[0]
			elif "Lgspec1" in w:
				form2 = inflect(lemma, f"ADJ;{ent_gender};SG;LGSPEC1", language='rus')[0]
			words[i] = form2

	
	return words
예제 #8
0
def fil_x(words, ent_form, ent_gender, ent_number, article):
    #ent_form = entities[ent_id][0]
    #ent_gender = entities[ent_id][1].upper()
    #ent_number = "SG"
    if ent_form[-2:] == "ες":
        ent_number = "PL"
        ent_gender = "FEM"

    if some_roman_chars(ent_form) or ent_form.isupper() or ent_form[-1] in [
            'β', 'γ', 'δ', 'ζ', 'κ', 'λ', 'μ', 'ν', 'ξ', 'π', 'ρ', 'τ', 'φ',
            'χ', 'ψ'
    ]:
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[X]' in words:
        i = words.index('[X]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[X.Nom]" in words:
        # In Greek the default case is Nominative so we don't need to try to inflect it
        i = words.index('[X.Nom]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[X.Gen]" in words:
        i = words.index('[X.Gen]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;GEN;{ent_number}",
                               language='ell2')[0]
        ent_case = "GEN"
    elif "[X.Acc]" in words:
        i = words.index('[X.Acc]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;ACC;{ent_number}",
                               language='ell2')[0]
        ent_case = "ACC"

    # Now also check the correponsing articles, if the exist
    if "[DEF;X]" in words:
        i = words.index('[DEF;X]')
        words[i] = article[f"ART;DEF;{ent_gender};{ent_number};{ent_case}"]
    if "[DEF.Gen;X]" in words:
        i = words.index('[DEF.Gen;X]')
        words[i] = article[f"ART;DEF;{ent_gender};{ent_number};GEN"]
    if "[PREPDEF;X]" in words:
        i = words.index('[PREPDEF;X]')
        words[i] = article[f"ART;PREPDEF;{ent_gender};{ent_number};{ent_case}"]

    # Now also check the corresponfing verbs, if they exist.
    # Needed for subject-verb agreement
    for i, w in enumerate(words):
        if w[0] == '[' and 'X-Number' in w:
            if '|' in w:
                options = w.strip()[1:-1].split('|')
                if ent_number == "SG":
                    form = options[0].strip().split(';')[0]
                    words[i] = form
                else:
                    form = options[1].strip().split(';')[0]
                    words[i] = form
    return words
예제 #9
0
def fil_y(words, ent_form, ent_gender, ent_number, article):
    #ent_form = entities[ent_id][0]
    #ent_gender = entities[ent_id][1].upper()
    #ent_number = "SG"
    if ent_form[-2:] == "ες" or ent_form[-2:] == "ές":
        ent_number = "PL"
        ent_gender = "FEM"
    #elif ent_form[-1] == "ά":
    #	ent_number = "PL"
    #	ent_gender = "NEUT"

    if some_roman_chars(ent_form) or ent_form.isupper() or ent_form[-1] in [
            'β', 'γ', 'δ', 'ζ', 'κ', 'λ', 'μ', 'ν', 'ξ', 'π', 'ρ', 'τ', 'φ',
            'χ', 'ψ'
    ]:
        do_not_inflect = True
    else:
        do_not_inflect = False

    if '[Y]' in words:
        i = words.index('[Y]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[Y.Nom]" in words:
        # In Greek the default case is Nominative so we don't need to try to inflect it
        i = words.index('[Y.Nom]')
        words[i] = ent_form
        ent_case = "NOM"
    elif "[Y.Gen]" in words:
        i = words.index('[Y.Gen]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;GEN;{ent_number}",
                               language='ell2')[0]
        ent_case = "GEN"
    elif "[Y.Acc]" in words:
        i = words.index('[Y.Acc]')
        if do_not_inflect:
            words[i] = ent_form
        else:
            words[i] = inflect(ent_form,
                               f"N;ACC;{ent_number}",
                               language='ell2')[0]
        ent_case = "ACC"
    # Now also check the correponsing articles, if they exist
    if "[DEF;Y]" in words:
        i = words.index('[DEF;Y]')
        words[i] = article[f"ART;DEF;{ent_gender};{ent_number};{ent_case}"]
    if "[DEF.Gen;Y]" in words:
        i = words.index('[DEF.Gen;Y]')
        words[i] = article[f"ART;DEF;{ent_gender};{ent_number};GEN"]
    if "[PREPDEF;Y]" in words:
        i = words.index('[PREPDEF;Y]')
        words[i] = article[f"ART;PREPDEF;{ent_gender};{ent_number};{ent_case}"]
    if "[INDEF;Y]" in words:
        i = words.index('[INDEF;Y]')
        #print(f"ART;INDEF;{ent_gender};{ent_number};{ent_case}")
        #print(article[f"ART;INDEF;{ent_gender};{ent_number};{ent_case}"])
        words[i] = article[f"ART;INDEF;{ent_gender};{ent_number};{ent_case}"]
    if "[DEF;Y.Fem]" in words:
        i = words.index('[DEF;Y.Fem]')
        words[i] = article[f"ART;DEF;FEM;{ent_number}"]

    return words
예제 #10
0
                        help='output file path',
                        default=None)
    # misc arguments
    parser.add_argument('--force-download',
                        help='force download of models',
                        action='store_true')

    args = parser.parse_args()
    # set output file path
    if args.output is None:
        output_file_path = 'text_file.out'
    else:
        output_file_path = args.output

    language = args.language
    print(inflect("love", "V;3;SG", language=language))
    print(inflect("love", "V;NFIN", language=language))
    print(inflect("love", "V;V.PTCP;PRS", language=language))
    print(inflect("drink", "V;3;SG", language=language))
    print(inflect("drink", "V;NFIN", language=language))
    print(inflect("drink", "V;V.PTCP;PRS", language=language))
    print(inflect("drink", "N;3;SG", language=language))
    print(inflect("αντίο", "V;3;SG", language=language))

    language = 'ell'
    print(inflect("Βέλγιο", "N;NOM;PL", language=language))
    print(inflect("Βέλγιο", "N;NEUT;GEN;SG", language=language))
    print(inflect("βέλγικη", "ADJ;FEM;GEN;SG", language=language))
    print(inflect("ανταγωνιστικότητα", "N;ACC;PL", language=language))
    print(
        inflect(["βλέπω", "ακούω"], ["V;3;SG;IPFV;PRS", "V;PFV;PST;3;PL"],
    parser.add_argument(
        '-t',
        '--testfile',
        help='file to test on (in Unimorph format) | default: <empty>',
        default='')
    parser.add_argument('-o',
                        '--output',
                        help='output file path',
                        default=None)
    # misc arguments
    parser.add_argument('--force-download',
                        help='force download of models',
                        action='store_true')

    args = parser.parse_args()
    # set output file path
    if args.output is None:
        output_file_path = 'text_file.out'
    else:
        output_file_path = args.output

    language = args.language

    try:
        inputs, outputs, tags = simple_read_data(args.testfile)
        curr_out = inflect(inputs, tags, language=language)
        correct = [o == c for o, c in zip(outputs, curr_out)]
        print(f"Accuracy: {float(sum(correct))/len(correct)}")
    except:
        print("dangit")