Пример #1
0
        if vfst_word_class == "[Lp]":
            entry = '[Lp]%s%s%s%s%s:%s%s EtuliitteenJatko_%s;' \
                    % (debug_info, rakenne, alkuWithTags, diacritics, infoFlags, alku, diacritics, get_prefix_jatko(word, altform))
        else:
            entry = '%s%s%s%s%s%s:%s%s %s%s_%s ;' \
                    % (vfst_word_class, debug_info, rakenne, infoFlags, injectBaseformToStructure(outputBaseform, alkuWithTags),
                    diacritics, alku, diacritics, vfst_class_prefix, jatko, vfst_vtype)
        vocabularyFile.write(entry + "\n")

    # Sanity check for alternative forms: if there are both multi part forms and single part forms
    # then all multi part forms must end with a part contained in the single part set.
    if singlePartForms:
        for multiPartForm in multiPartForms:
            lastPart = multiPartForm[max(multiPartForm.rfind(
                "="), multiPartForm.rfind("|"), multiPartForm.rfind("-")) + 1:]
            if lastPart not in singlePartForms:
                sys.stderr.write(
                    "ERROR: suspicious alternative spelling: %s\n" %
                    multiPartForm)
                sys.exit(1)


voikkoutils.process_wordlist(generate_lex_common.VOCABULARY_DATA + '/joukahainen.xml', \
                             handle_word, True)

for fileSuffix in vocabularyFileSuffixes:
    vocabularyFiles[fileSuffix].write(
        "\n\n"
    )  # Extra line feeds needed to avoid mixed lines in concatenated lexc file
    vocabularyFiles[fileSuffix].close()
Пример #2
0
		elif vtype == voikkoutils.VOWEL_BOTH: malaga_vtype = 'aä'
		rakenne = generate_lex_common.get_structure(altform, malaga_word_class)
		if baseform is None:
			altBaseform = altform
		else:
			altBaseform = baseform
		if malaga_word_class == "lyhenne":
			perusmuotoEntry = ""
		else:
			perusmuotoEntry = 'perusmuoto: "%s", ' % altBaseform
		entry = '[%salku: "%s", luokka: %s, jatko: %s, äs: %s%s%s%s];' \
		          % (perusmuotoEntry, alku, malaga_word_class, malaga_jatko, malaga_vtype, malaga_flags,
			   generate_lex_common.get_structure(altform, malaga_word_class),
			   additional_attributes)
		generate_lex_common.write_entry(main_vocabulary, {}, word, entry)
	
	# Sanity check for alternative forms: if there are both multi part forms and single part forms
	# then all multi part forms must end with a part contained in the single part set.
	if singlePartForms:
		for multiPartForm in multiPartForms:
			lastPart = multiPartForm[max(multiPartForm.rfind("="), multiPartForm.rfind("|"), multiPartForm.rfind("-")) + 1:]
			if lastPart not in singlePartForms:
				sys.stderr.write("ERROR: suspicious alternative spelling: %s\n" % multiPartForm)
				sys.exit(1)


voikkoutils.process_wordlist(generate_lex_common.VOCABULARY_DATA + '/joukahainen.xml', \
                             handle_word, True)

main_vocabulary.close()