Exemplo n.º 1
0
import sukija
import hfconv
import xml.dom.minidom
import codecs
import getopt

import locale
#print locale.getlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, '')

path = SUKIJA_LEX

flag_attributes = voikkoutils.readFlagAttributes(
    generate_lex_common.VOCABULARY_DATA + u"/flags.txt")

main_vocabulary = generate_lex_common.open_lex(path, "joukahainen.lex")
vocabulary_files = {}
for voc in generate_lex_common.SPECIAL_VOCABULARY:
    vocabulary_files[voc[2]] = generate_lex_common.open_lex(path, voc[2])

listfile = open(generate_lex_common.VOCABULARY_DATA + u'/joukahainen.xml', 'r')

line = ""
while line != '<wordlist xml:lang="fi">\n':
    line = listfile.readline()
    if line == '':
        sys.stderr.write("Malformed file " + generate_lex_common.VOCABULARY_DATA + \
                         "/joukahainen.xml\n")
        sys.exit(1)

wcount = 0
Exemplo n.º 2
0
import xml.dom.minidom
import codecs
from string import rfind

flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + u"/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex")

def frequency(word):
	fclass = word.getElementsByTagName("fclass")
	if len(fclass) == 0: return 7
	return int(generate_lex_common.tValue(fclass[0]))

# Check the style flags of the word according to current options.
# Returns True if the word is acceptable, otherwise returns false.
def check_style(word):
	global OPTIONS
	for styleE in word.getElementsByTagName("style"):
		for style in generate_lex_common.tValues(styleE, "flag"):
			if style == "foreignloan":
				continue
			if not style in OPTIONS["style"]: return False
Exemplo n.º 3
0
import voikkoutils
import xml.dom.minidom
import codecs

flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + "/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex")

def frequency(word):
	fclass = word.getElementsByTagName("fclass")
	if len(fclass) == 0: return 7
	return int(generate_lex_common.tValue(fclass[0]))

# Check the style flags of the word according to current options.
# Returns True if the word is acceptable, otherwise returns false.
def check_style(word):
	global OPTIONS
	for styleE in word.getElementsByTagName("style"):
		for style in generate_lex_common.tValues(styleE, "flag"):
			if style == "foreignloan":
				continue
			if not style in OPTIONS["style"]: return False
Exemplo n.º 4
0
import sukija
import hfconv
import xml.dom.minidom
import codecs
import getopt

import locale
#print locale.getlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, '')


path = SUKIJA_LEX

flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + u"/flags.txt")

main_vocabulary = generate_lex_common.open_lex(path,"joukahainen.lex")
vocabulary_files = {}
for voc in generate_lex_common.SPECIAL_VOCABULARY:
	vocabulary_files[voc[2]] = generate_lex_common.open_lex(path,voc[2])


listfile = open(generate_lex_common.VOCABULARY_DATA + u'/joukahainen.xml', 'r')

line = ""
while line != '<wordlist xml:lang="fi">\n':
	line = listfile.readline()
	if line == '':
		sys.stderr.write("Malformed file " + generate_lex_common.VOCABULARY_DATA + \
		                 "/joukahainen.xml\n")
		sys.exit(1)