示例#1
0
文件: sukija.py 项目: m5w/corevoikko
def handle_word(main_vocabulary, vocabulary_files, word):
    if generate_lex_common.has_flag(word, "not_sukija"):
        return

    # Get the inflection class. Exactly one inflection class is needed.
    infclasses = word.getElementsByTagName("infclass")
    voikko_infclass = None
    for infclass in word.getElementsByTagName("infclass"):
        if infclass.getAttribute("type") == "historical":
            voikko_infclass = generate_lex_common.tValue(infclass)
            break
    if voikko_infclass in [
        u"antautua",
        u"kaihtaa",
        u"laittaa",
        u"paahtaa",
        u"taittaa",
        u"veranta",
        u"vihanta",
        u"virkkaa",
    ]:
        voikko_infclass = voikko_infclass + u"-av1"

    if voikko_infclass == None:
        for infclass in word.getElementsByTagName("infclass"):
            if infclass.getAttribute("type") != "historical":
                voikko_infclass = generate_lex_common.tValue(infclass)
                break

    ##	if voikko_infclass == None: return
    if voikko_infclass == u"poikkeava":
        return

    # Get the word classes
    wordclasses = generate_lex_common.tValues(word.getElementsByTagName("classes")[0], "wclass")
    if wordclasses[0] != u"interjection" and voikko_infclass == None:
        return
    malaga_word_class = generate_lex_common.get_malaga_word_class(wordclasses)
    if malaga_word_class == None:
        return

    # Get malaga flags
    malaga_flags = generate_lex_common.get_malaga_flags(word)

    # Get forced vowel type
    if voikko_infclass == None:
        forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
    else:
        forced_inflection_vtype = generate_lex_common.vowel_type(word.getElementsByTagName("inflection")[0])

        # Get forced vowel type
    ###	forced_inflection_vtype = generate_lex_common.vowel_type(word.getElementsByTagName("inflection")[0])

    # Process all alternative forms
    for altform in generate_lex_common.tValues(word.getElementsByTagName("forms")[0], "form"):
        wordform = altform.replace(u"|", u"").replace(u"=", u"")
        if (voikko_infclass == u"nuolaista-av2") and (wordform in [u"häväistä", u"vavista"]):
            voikko_infclass = u"nuolaista"
        # 		print (u"Hoo " + str(voikko_infclass) + u" " + u" " + wordform + u"\n")
        # 		print(u"Tavutus1 " + wordform + u" " + hyphenate(wordform.lower()) + u"\n")
        (alku, jatko) = generate_lex_common.get_malaga_inflection_class(
            wordform, voikko_infclass, wordclasses, classmap
        )
        # 		print (u"Huu " + wordform + u" " + str(alku) + u" " + str(jatko) + u" "  + str(voikko_infclass))
        if forced_inflection_vtype == voikkoutils.VOWEL_DEFAULT:
            vtype = voikkoutils.get_wordform_infl_vowel_type(altform)
        else:
            vtype = forced_inflection_vtype
        if vtype == voikkoutils.VOWEL_FRONT:
            malaga_vtype = u"ä"
        elif vtype == voikkoutils.VOWEL_BACK:
            malaga_vtype = u"a"
        elif vtype == voikkoutils.VOWEL_BOTH:
            malaga_vtype = u"aä"
        malaga_vtype = new_vtype(malaga_vtype, wordform)
        rakenne = generate_lex_common.get_structure(altform, malaga_word_class)
        if alku == None:
            generate_lex_common.write_entry(
                main_vocabulary,
                vocabulary_files,
                word,
                u"#Malaga class not found for (%s, %s)\n" % (wordform, voikko_infclass),
            )
            continue
        if wordform in words:
            # 			print ("Ei tarvita: " + wordform)
            continue
        if rx_begin.match(wordform) != None:
            # 			print ("Ei tarvita: " + wordform)
            continue
        if rx_end.match(wordform) != None:
            # 			print ("Ei tarvita: " + wordform)
            continue
            # Joillakin sanoilla on sanastossa kaksi taivususkaavaa, Sukijassa
            # taivutuskaavat on yhdistetty, ja toisen taivutuskaavan voi poistaa.
        if (wordform in [u"ori", u"ripsi", u"sini", u"täti", u"äiti"]) and (jatko == u"risti"):
            # 			print ("Ei tarvita: " + wordform)
            continue
        if (wordform == u"kampi") and (jatko == u"sampi"):
            # 			print ("Ei tarvita: " + wordform)
            continue

        # 		nsyl = number_of_syllabels(wordform)

        m = rx.match(wordform)
        d = None

        if m != None:
            d = m.groupdict()

        alku2 = u""
        jatko2 = u""
        wordform2 = u""

        alku3 = u""
        jatko3 = u""
        wordform3 = u""

        alku4 = u""
        jatko4 = u""
        wordform4 = u""

        alku5 = u""
        jatko5 = u""
        wordform5 = u""

        alku6 = u""
        jatko6 = u""
        wordform6 = u""

        s = u'lähtösana: "' + wordform + u'", lähtöalku: "' + alku + u'"'

        # Korjataan alku- ja jatko-kenttien arvoja.
        #
        # 		elif (jatko == u"rakentaa"):
        if jatko == u"rakentaa":
            alku = wordform[:-4]

            # Tulostetaan.

        # 		print(u"Word   " + wordform + u"\n")
        entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s];' % (
            wordform,
            alku,
            malaga_word_class,
            jatko,
            malaga_vtype,
            malaga_flags,
            generate_lex_common.get_structure(altform, malaga_word_class),
        )
        generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)

        write_word_without_accents(main_vocabulary, vocabulary_files, word, entry, wordform)

        if len(wordform2) > 0:
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' % (
                wordform2,
                alku2,
                malaga_word_class,
                jatko2,
                malaga_vtype,
                malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class),
                s,
            )
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)

        if len(wordform3) > 0:
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' % (
                wordform3,
                alku3,
                malaga_word_class,
                jatko3,
                malaga_vtype,
                malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class),
                s,
            )
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)

        if len(wordform4) > 0:
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' % (
                wordform4,
                alku4,
                malaga_word_class,
                jatko4,
                malaga_vtype,
                malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class),
                s,
            )
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)

        if len(wordform5) > 0:
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' % (
                wordform5,
                alku5,
                malaga_word_class,
                jatko5,
                malaga_vtype,
                malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class),
                s,
            )
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)

        if len(wordform6) > 0:
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' % (
                wordform6,
                alku6,
                malaga_word_class,
                jatko6,
                malaga_vtype,
                malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class),
                s,
            )
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, entry)
示例#2
0
def handle_word(word):
	global OPTIONS
	global CLASSMAP
	# Drop words that are not needed in the Voikko lexicon
	if generate_lex_common.has_flag(word, "not_voikko") and "sukija" not in OPTIONS["extra-usage"]:
		return
	if not check_style(word): return
	if not check_usage(word): return
	if frequency(word) >= OPTIONS["frequency"] + 1: return
	if frequency(word) == OPTIONS["frequency"] and generate_lex_common.has_flag(word, "confusing"): return
	
	# Get the inflection class. Exactly one inflection class is needed
	voikko_infclass = None
	for infclass in word.getElementsByTagName("infclass"):
		if infclass.getAttribute("type") != "historical":
			voikko_infclass = generate_lex_common.tValue(infclass)
			break
	if voikko_infclass == u"poikkeava": return
	
	# Get the word classes
	wordclasses = generate_lex_common.tValues(word.getElementsByTagName("classes")[0], "wclass")
	if wordclasses[0] not in [u"interjection", u"prefix", u"abbreviation", u"conjunction", u"adverb"] and voikko_infclass == None:
		return
	malaga_word_class = generate_lex_common.get_malaga_word_class(wordclasses)
	if malaga_word_class == None: return
	
	baseformTags = word.getElementsByTagName("baseform")
	if len(baseformTags) > 0:
		baseform = generate_lex_common.tValue(baseformTags[0])
	else:
		baseform = None
	
	# Get malaga flags
	malaga_flags = generate_lex_common.get_malaga_flags(word)
	
	# Get forced vowel type
	if voikko_infclass == None and malaga_word_class != u"lyhenne":
		forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
	else:
		inflectionElement = word.getElementsByTagName("inflection")
		if len(inflectionElement) > 0:
			forced_inflection_vtype = generate_lex_common.vowel_type(inflectionElement[0])
		else:
			forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
	
	# Construct debug information and additional attributes
	additional_attributes = get_additional_attributes(word)
	if OPTIONS["sourceid"]:
		additional_attributes = additional_attributes + u', sourceid: "%s"' % word.getAttribute("id")
	
	# Process all alternative forms
	singlePartForms = []
	multiPartForms = []
	for altform in generate_lex_common.tValues(word.getElementsByTagName("forms")[0], "form"):
		wordform = altform.replace(u'|', u'').replace(u'=', u'')
		if len(altform) == len(wordform.replace(u'-', u'')):
			singlePartForms.append(altform)
		else:
			multiPartForms.append(altform)
		(alku, jatko) = generate_lex_common.get_malaga_inflection_class(wordform, voikko_infclass, wordclasses, CLASSMAP)
		if alku == None:
			errorstr = u"ERROR: Malaga class not found for (%s, %s)\n" \
				% (wordform, voikko_infclass)
			generate_lex_common.write_entry(main_vocabulary, {}, word, errorstr)
			sys.stderr.write(errorstr.encode(u"UTF-8"))
			sys.exit(1)
		if malaga_word_class == u"lyhenne":
			jatko = get_abbreviation_jatko(word, altform)
		elif malaga_word_class == u"seikkasana":
			jatko = get_adverb_jatko(word)
		if malaga_word_class == u"etuliite":
			vtype = voikkoutils.VOWEL_BOTH
			malaga_jatko = get_prefix_jatko(word)
		else:
			if forced_inflection_vtype == voikkoutils.VOWEL_DEFAULT:
				vtype = voikkoutils.get_wordform_infl_vowel_type(altform)
			else:
				vtype = forced_inflection_vtype
			malaga_jatko = u"<" + jatko + u">"
		if vtype == voikkoutils.VOWEL_FRONT: malaga_vtype = u'ä'
		elif vtype == voikkoutils.VOWEL_BACK: malaga_vtype = u'a'
		elif vtype == voikkoutils.VOWEL_BOTH: malaga_vtype = u'aä'
		rakenne = generate_lex_common.get_structure(altform, malaga_word_class)
		if baseform is None:
			altBaseform = altform
		else:
			altBaseform = baseform
		if malaga_word_class == u"lyhenne":
			perusmuotoEntry = u""
		else:
			perusmuotoEntry = u'perusmuoto: "%s", ' % altBaseform
		entry = u'[%salku: "%s", luokka: %s, jatko: %s, äs: %s%s%s%s];' \
		          % (perusmuotoEntry, alku, malaga_word_class, malaga_jatko, malaga_vtype, malaga_flags,
			   generate_lex_common.get_structure(altform, malaga_word_class),
			   additional_attributes)
		generate_lex_common.write_entry(main_vocabulary, {}, word, entry)
	
	# Sanity check for alternative forms: if there are both multi part forms and single part forms
	# then all multi part forms must end with a part contained in the single part set.
	if singlePartForms:
		for multiPartForm in multiPartForms:
			lastPart = multiPartForm[max(rfind(multiPartForm, u"="), rfind(multiPartForm, u"|"), rfind(multiPartForm, u"-")) + 1:]
			if lastPart not in singlePartForms:
				sys.stderr.write(u"ERROR: suspicious alternative spelling: %s\n" % multiPartForm)
				sys.exit(1)
示例#3
0
def handle_word(word):
	global OPTIONS
	global CLASSMAP
	# Drop words that are not needed in the Voikko lexicon
	if generate_lex_common.has_flag(word, "not_voikko") and "sukija" not in OPTIONS["extra-usage"]:
		return
	if not check_style(word): return
	if not check_usage(word): return
	if frequency(word) >= OPTIONS["frequency"] + 1: return
	if frequency(word) == OPTIONS["frequency"] and generate_lex_common.has_flag(word, "confusing"): return
	
	# Get the inflection class. Exactly one inflection class is needed
	voikko_infclass = None
	for infclass in word.getElementsByTagName("infclass"):
		if infclass.getAttribute("type") != "historical":
			voikko_infclass = generate_lex_common.tValue(infclass)
			break
	if voikko_infclass == "poikkeava": return
	
	# Get the word classes
	wordclasses = generate_lex_common.tValues(word.getElementsByTagName("classes")[0], "wclass")
	if wordclasses[0] not in ["interjection", "prefix", "abbreviation", "conjunction", "adverb"] and voikko_infclass == None:
		return
	malaga_word_class = generate_lex_common.get_malaga_word_class(wordclasses)
	if malaga_word_class == None: return
	
	baseformTags = word.getElementsByTagName("baseform")
	if len(baseformTags) > 0:
		baseform = generate_lex_common.tValue(baseformTags[0])
	else:
		baseform = None
	
	# Get malaga flags
	malaga_flags = generate_lex_common.get_malaga_flags(word)
	
	# Get forced vowel type
	if voikko_infclass == None and malaga_word_class != "lyhenne":
		forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
	else:
		inflectionElement = word.getElementsByTagName("inflection")
		if len(inflectionElement) > 0:
			forced_inflection_vtype = generate_lex_common.vowel_type(inflectionElement[0])
		else:
			forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
	
	# Construct debug information and additional attributes
	additional_attributes = get_additional_attributes(word)
	if OPTIONS["sourceid"]:
		additional_attributes = additional_attributes + ', sourceid: "%s"' % word.getAttribute("id")
	
	# Process all alternative forms
	singlePartForms = []
	multiPartForms = []
	for altform in generate_lex_common.tValues(word.getElementsByTagName("forms")[0], "form"):
		wordform = altform.replace('|', '').replace('=', '')
		if len(altform) == len(wordform.replace('-', '')):
			singlePartForms.append(altform)
		else:
			multiPartForms.append(altform)
		(alku, jatko) = generate_lex_common.get_malaga_inflection_class(wordform, voikko_infclass, wordclasses, CLASSMAP)
		if alku == None:
			errorstr = "ERROR: Malaga class not found for (%s, %s)\n" \
				% (wordform, voikko_infclass)
			generate_lex_common.write_entry(main_vocabulary, {}, word, errorstr)
			sys.stderr.write(errorstr.encode("UTF-8"))
			sys.exit(1)
		if malaga_word_class == "lyhenne":
			jatko = get_abbreviation_jatko(word, altform)
		elif malaga_word_class == "seikkasana":
			jatko = get_adverb_jatko(word)
		if malaga_word_class == "etuliite":
			vtype = voikkoutils.VOWEL_BOTH
			malaga_jatko = get_prefix_jatko(word)
		else:
			if forced_inflection_vtype == voikkoutils.VOWEL_DEFAULT:
				vtype = voikkoutils.get_wordform_infl_vowel_type(altform)
			else:
				vtype = forced_inflection_vtype
			malaga_jatko = "<" + jatko + ">"
		if vtype == voikkoutils.VOWEL_FRONT: malaga_vtype = 'ä'
		elif vtype == voikkoutils.VOWEL_BACK: malaga_vtype = 'a'
		elif vtype == voikkoutils.VOWEL_BOTH: malaga_vtype = 'aä'
		rakenne = generate_lex_common.get_structure(altform, malaga_word_class)
		if baseform is None:
			altBaseform = altform
		else:
			altBaseform = baseform
		if malaga_word_class == "lyhenne":
			perusmuotoEntry = ""
		else:
			perusmuotoEntry = 'perusmuoto: "%s", ' % altBaseform
		entry = '[%salku: "%s", luokka: %s, jatko: %s, äs: %s%s%s%s];' \
		          % (perusmuotoEntry, alku, malaga_word_class, malaga_jatko, malaga_vtype, malaga_flags,
			   generate_lex_common.get_structure(altform, malaga_word_class),
			   additional_attributes)
		generate_lex_common.write_entry(main_vocabulary, {}, word, entry)
	
	# Sanity check for alternative forms: if there are both multi part forms and single part forms
	# then all multi part forms must end with a part contained in the single part set.
	if singlePartForms:
		for multiPartForm in multiPartForms:
			lastPart = multiPartForm[max(multiPartForm.rfind("="), multiPartForm.rfind("|"), multiPartForm.rfind("-")) + 1:]
			if lastPart not in singlePartForms:
				sys.stderr.write("ERROR: suspicious alternative spelling: %s\n" % multiPartForm)
				sys.exit(1)
示例#4
0
def handle_word(main_vocabulary, vocabulary_files, word):
    if generate_lex_common.has_flag(word, "not_sukija"): return

    # Get the inflection class. Exactly one inflection class is needed.
    infclasses = word.getElementsByTagName("infclass")
    voikko_infclass = None
    for infclass in word.getElementsByTagName("infclass"):
        if infclass.getAttribute("type") == "historical":
            voikko_infclass = generate_lex_common.tValue(infclass)
            break
    if (voikko_infclass in [
            u"antautua", u"kaihtaa", u"laittaa", u"paahtaa", u"taittaa",
            u"veranta", u"vihanta", u"virkkaa"
    ]):
        voikko_infclass = voikko_infclass + u"-av1"

    if voikko_infclass == None:
        for infclass in word.getElementsByTagName("infclass"):
            if infclass.getAttribute("type") != "historical":
                voikko_infclass = generate_lex_common.tValue(infclass)
                break

##	if voikko_infclass == None: return
    if voikko_infclass == u"poikkeava": return

    # Get the word classes
    wordclasses = generate_lex_common.tValues(
        word.getElementsByTagName("classes")[0], "wclass")
    if wordclasses[0] != u"interjection" and voikko_infclass == None:
        return
    malaga_word_class = generate_lex_common.get_malaga_word_class(wordclasses)
    if malaga_word_class == None: return

    # Get malaga flags
    malaga_flags = generate_lex_common.get_malaga_flags(word)

    # Get forced vowel type
    if voikko_infclass == None:
        forced_inflection_vtype = voikkoutils.VOWEL_DEFAULT
    else:
        forced_inflection_vtype = generate_lex_common.vowel_type(
            word.getElementsByTagName("inflection")[0])

    # Get forced vowel type
###	forced_inflection_vtype = generate_lex_common.vowel_type(word.getElementsByTagName("inflection")[0])

# Process all alternative forms
    for altform in generate_lex_common.tValues(
            word.getElementsByTagName("forms")[0], "form"):
        wordform = altform.replace(u'|', u'').replace(u'=', u'')
        if (voikko_infclass == u"nuolaista-av2") and (wordform in [
                u"häväistä", u"vavista"
        ]):
            voikko_infclass = u"nuolaista"
#		print (u"Hoo " + str(voikko_infclass) + u" " + u" " + wordform + u"\n")
#		print(u"Tavutus1 " + wordform + u" " + hyphenate(wordform.lower()) + u"\n")
        (alku, jatko) = generate_lex_common.get_malaga_inflection_class(
            wordform, voikko_infclass, wordclasses, classmap)
        #		print (u"Huu " + wordform + u" " + str(alku) + u" " + str(jatko) + u" "  + str(voikko_infclass))
        if forced_inflection_vtype == voikkoutils.VOWEL_DEFAULT:
            vtype = voikkoutils.get_wordform_infl_vowel_type(altform)
        else:
            vtype = forced_inflection_vtype
        if vtype == voikkoutils.VOWEL_FRONT: malaga_vtype = u'ä'
        elif vtype == voikkoutils.VOWEL_BACK: malaga_vtype = u'a'
        elif vtype == voikkoutils.VOWEL_BOTH: malaga_vtype = u'aä'
        malaga_vtype = new_vtype(malaga_vtype, wordform)
        rakenne = generate_lex_common.get_structure(altform, malaga_word_class)
        if alku == None:
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files, word, \
                u"#Malaga class not found for (%s, %s)\n" \
                               % (wordform, voikko_infclass))
            continue
        if (wordform in words):
            #			print ("Ei tarvita: " + wordform)
            continue
        if (rx_begin.match(wordform) != None):
            #			print ("Ei tarvita: " + wordform)
            continue
        if (rx_end.match(wordform) != None):
            #			print ("Ei tarvita: " + wordform)
            continue
        # Joillakin sanoilla on sanastossa kaksi taivususkaavaa, Sukijassa
        # taivutuskaavat on yhdistetty, ja toisen taivutuskaavan voi poistaa.
        if ((wordform in [u'ori', u'ripsi', u'sini', u'täti', u'äiti'])
                and (jatko == u'risti')):
            #			print ("Ei tarvita: " + wordform)
            continue
        if ((wordform == u'kampi') and (jatko == u'sampi')):
            #			print ("Ei tarvita: " + wordform)
            continue

#		nsyl = number_of_syllabels(wordform)

        m = rx.match(wordform)
        d = None

        if (m != None):
            d = m.groupdict()

        alku2 = u""
        jatko2 = u""
        wordform2 = u""

        alku3 = u""
        jatko3 = u""
        wordform3 = u""

        alku4 = u""
        jatko4 = u""
        wordform4 = u""

        alku5 = u""
        jatko5 = u""
        wordform5 = u""

        alku6 = u""
        jatko6 = u""
        wordform6 = u""

        s = u"lähtösana: \"" + wordform + u"\", lähtöalku: \"" + alku + u"\""

        # Korjataan alku- ja jatko-kenttien arvoja.
        #
        #		elif (jatko == u"rakentaa"):
        if (jatko == u"rakentaa"):
            alku = wordform[:-4]

        # Tulostetaan.

#		print(u"Word   " + wordform + u"\n")
        entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s];' \
         % (wordform, alku, malaga_word_class, jatko, malaga_vtype, malaga_flags,
            generate_lex_common.get_structure(altform, malaga_word_class))
        generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                        word, entry)

        write_word_without_accents(main_vocabulary, vocabulary_files, word,
                                   entry, wordform)

        if (len(wordform2) > 0):
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' \
             % (wordform2, alku2, malaga_word_class, jatko2, malaga_vtype, malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class), s)
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                            word, entry)

        if (len(wordform3) > 0):
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' \
             % (wordform3, alku3, malaga_word_class, jatko3, malaga_vtype, malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class), s)
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                            word, entry)

        if (len(wordform4) > 0):
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' \
             % (wordform4, alku4, malaga_word_class, jatko4, malaga_vtype, malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class), s)
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                            word, entry)

        if (len(wordform5) > 0):
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' \
             % (wordform5, alku5, malaga_word_class, jatko5, malaga_vtype, malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class), s)
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                            word, entry)

        if (len(wordform6) > 0):
            entry = u'[perusmuoto: "%s", alku: "%s", luokka: %s, jatko: <%s>, äs: %s%s%s, %s];' \
             % (wordform6, alku6, malaga_word_class, jatko6, malaga_vtype, malaga_flags,
                generate_lex_common.get_structure(altform, malaga_word_class), s)
            generate_lex_common.write_entry(main_vocabulary, vocabulary_files,
                                            word, entry)