import sukija import hfconv import xml.dom.minidom import codecs import getopt import locale #print locale.getlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, '') path = SUKIJA_LEX flag_attributes = voikkoutils.readFlagAttributes( generate_lex_common.VOCABULARY_DATA + u"/flags.txt") main_vocabulary = generate_lex_common.open_lex(path, "joukahainen.lex") vocabulary_files = {} for voc in generate_lex_common.SPECIAL_VOCABULARY: vocabulary_files[voc[2]] = generate_lex_common.open_lex(path, voc[2]) listfile = open(generate_lex_common.VOCABULARY_DATA + u'/joukahainen.xml', 'r') line = "" while line != '<wordlist xml:lang="fi">\n': line = listfile.readline() if line == '': sys.stderr.write("Malformed file " + generate_lex_common.VOCABULARY_DATA + \ "/joukahainen.xml\n") sys.exit(1) wcount = 0
import xml.dom.minidom import codecs from string import rfind flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + u"/flags.txt") # Get command line options OPTIONS = generate_lex_common.get_options() # Inflection class map CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap) # No special vocabularies are built for Voikko generate_lex_common.SPECIAL_VOCABULARY = [] main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex") def frequency(word): fclass = word.getElementsByTagName("fclass") if len(fclass) == 0: return 7 return int(generate_lex_common.tValue(fclass[0])) # Check the style flags of the word according to current options. # Returns True if the word is acceptable, otherwise returns false. def check_style(word): global OPTIONS for styleE in word.getElementsByTagName("style"): for style in generate_lex_common.tValues(styleE, "flag"): if style == "foreignloan": continue if not style in OPTIONS["style"]: return False
import voikkoutils import xml.dom.minidom import codecs flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + "/flags.txt") # Get command line options OPTIONS = generate_lex_common.get_options() # Inflection class map CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap) # No special vocabularies are built for Voikko generate_lex_common.SPECIAL_VOCABULARY = [] main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex") def frequency(word): fclass = word.getElementsByTagName("fclass") if len(fclass) == 0: return 7 return int(generate_lex_common.tValue(fclass[0])) # Check the style flags of the word according to current options. # Returns True if the word is acceptable, otherwise returns false. def check_style(word): global OPTIONS for styleE in word.getElementsByTagName("style"): for style in generate_lex_common.tValues(styleE, "flag"): if style == "foreignloan": continue if not style in OPTIONS["style"]: return False
import sukija import hfconv import xml.dom.minidom import codecs import getopt import locale #print locale.getlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, '') path = SUKIJA_LEX flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + u"/flags.txt") main_vocabulary = generate_lex_common.open_lex(path,"joukahainen.lex") vocabulary_files = {} for voc in generate_lex_common.SPECIAL_VOCABULARY: vocabulary_files[voc[2]] = generate_lex_common.open_lex(path,voc[2]) listfile = open(generate_lex_common.VOCABULARY_DATA + u'/joukahainen.xml', 'r') line = "" while line != '<wordlist xml:lang="fi">\n': line = listfile.readline() if line == '': sys.stderr.write("Malformed file " + generate_lex_common.VOCABULARY_DATA + \ "/joukahainen.xml\n") sys.exit(1)