Пример #1
0
 def testInitWithCorrectDictWorks(self):
     self.voikko.terminate()
     self.voikko = Voikko(u"fi-x-standard")
     self.failIf(self.voikko.spell(u"amifostiini"))
     self.voikko.terminate()
     self.voikko = Voikko(u"fi-x-medicine")
     self.failUnless(self.voikko.spell(u"amifostiini"))
Пример #2
0
def initVoikko():
    global _voikko
    for allowedDict in ALLOWED_DICTS:
        v = Voikko(allowedDict)
        v.setIgnoreDot(False)
        v.setAcceptUnfinishedParagraphsInGc(True)
        _voikko[allowedDict] = v
    for d in Voikko.listDicts():
        tag = d.language + u"-x-" + d.variant
        if tag in ALLOWED_DICTS:
            _dictInfo[tag] = d
 def __openHandleWithVariant(self, language, fullVariant):
     logging.debug("VoikkoHandlePool.__openHandleWithVariant")
     try:
         voikkoHandle = Voikko(fullVariant, self.getDictionaryPath())
         self.__handles[language] = voikkoHandle
         for booleanOpt, booleanValue in self.__globalBooleanOptions.items(
         ):
             voikkoHandle.setBooleanOption(booleanOpt, booleanValue)
         for integerOpt, integerValue in self.__globalIntegerOptions.items(
         ):
             voikkoHandle.setIntegerOption(integerOpt, integerValue)
         return voikkoHandle
     except VoikkoException as e:
         self.__initializationErrors[language] = e.args[0]
         return None
Пример #4
0
 def testAnotherObjectCanBeCreatedUsedAndDeletedInParallel(self):
     medicalVoikko = Voikko(u"fi-x-medicine")
     self.failUnless(medicalVoikko.spell(u"amifostiini"))
     self.failIf(self.voikko.spell(u"amifostiini"))
     del medicalVoikko
     self.failIf(self.voikko.spell(u"amifostiini"))
Пример #5
0
 def setUp(self):
     self.voikko = Voikko(u"fi")
Пример #6
0
 def testInitWithPathWorks(self):
     # TODO: better test
     self.voikko.terminate()
     self.voikko = Voikko(u"fi", path=u"/path/to/nowhere")
     self.failUnless(self.voikko.spell(u"kissa"))
Пример #7
0
 def tryInit():
     self.voikko = Voikko(u"fi-x-non-existent-variant")
"""Contains functions for retrieving pre-processed words from one teletext frontpage image.

See instructions in words_from_image()
"""

import re
from typing import List, Tuple

import pytesseract
from PIL import Image, ImageOps
from libvoikko import Voikko

# these settings only work in Windows environment
Voikko.setLibrarySearchPath("C:/python37/DLLs")
voikko = Voikko("fi-x-morphoid")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'


def words_from_image(filename: str) -> List[List[str]]:
    """Retrieve pre-processed words from given 'filename' containing teletext frontpage image.

    Return value is a list of lists
    e.g. [['word1', 'word2'], ['word1', 'word2', 'word3']]
    """

    # make image black and white
    image = Image.open(filename).convert('1').convert('RGB')

    # invert black and white
    image = ImageOps.invert(image)
Пример #9
0
import re

from libvoikko import Voikko

# v = Voikko('fi-x-morphoid')
v = Voikko("fi")

# Replace all non letter characters with space
RE_WS_REPLACE = re.compile(r"[^\w]", re.UNICODE)
RE_FIND_COMPOUNDS = re.compile(r"\(([\w+]+)\)", re.UNICODE)


def voikko_analyze(text):
    text = RE_WS_REPLACE.sub(" ", text)
    words = text.split(" ")
    # Strip spaces
    words = [x.strip() for x in words]
    # Remove empty items
    words = filter(None, words)
    # Loop all words and analyze them
    analyzed = []
    for word in words:
        aword = v.analyze(word)
        if aword:
            i = 0
            for f in aword:
                i += 1
                f["found"] = True
                f["original"] = word
                wordbases = RE_FIND_COMPOUNDS.findall(f.get("WORDBASES", ""))
                f["wordbase_list"] = [
Пример #10
0
	def __init__(self, attributes, langtag="fi"):
		self.input = input
		self.attributes = attributes
		self.voikko = Voikko(langtag)
		self.__init_feature_names()
Пример #11
0
	def __init__(self, langtag="fi", binary=False, stop_word_classes=[]):
		self.voikko = Voikko(langtag)
		self.stop_word_classes = set(stop_word_classes)
		super().__init__(binary=binary)
Пример #12
0
locale.setlocale(locale.LC_ALL, "FI_fi")
weekday = datetime.datetime.now().strftime("%A")  # e.g. Tiistai
if len(sys.argv) > 1:
    weekday = sys.argv[1]

pattern = re.compile(".*{}.*".format(weekday))
URL = "http://pompier.fi/espa/lounas/"
text = get_html(URL)
soup = BeautifulSoup(text)
# columns = soup.find_all('strong')
todays_lunch = soup.find(text=pattern)
print(todays_lunch.parent.parent.text)

from libvoikko import Voikko, Token

v = Voikko(u"fi-x-morphoid")
ttt = (todays_lunch.parent.parent.text.replace("-",
                                               " ").replace("\r", " ").replace(
                                                   "\n", " "))
all_words = []
for word in ttt.split(" "):
    word = word.strip("\n\r,.")
    foo = v.analyze(word)
    print("-- " + word + "--")
    if foo and "BASEFORM" in foo[0]:
        base = foo[0]["BASEFORM"]
    else:
        base = word
    all_words.append(base)
    print(":  " + base)
Пример #13
0
from libvoikko import Voikko
voikko = Voikko("fi")

# from https://stackoverflow.com/a/1988826/95357


class Memoize:
    def __init__(self, f):
        self.f = f
        self.memo = {}

    def __call__(self, *args):
        if not args in self.memo:
            self.memo[args] = self.f(*args)
        # Warning: You may wish to do a deepcopy here if returning objects
        return self.memo[args]


@Memoize
def analyze_word(form):
    return voikko.analyze(form)
Пример #14
0
from flask import Flask, request
from flask_restful import Resource, Api
from flask import jsonify

import sys
from libvoikko import Voikko

app = Flask(__name__)
api = Api(app)

v = Voikko('fi')


class Finnish_text_analysis(Resource):
    def get(self):
        word = request.args.get('word')
        return (jsonify(self.process(word)))


class Analyze(Finnish_text_analysis):
    def process(self, word):
        return v.analyze(word)


class Spell(Finnish_text_analysis):
    def process(self, word):
        return {"spelling": v.spell(word)}


class Suggest(Finnish_text_analysis):
    def process(self, word):
Пример #15
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from libvoikko import Voikko

AUTHOR = 'Viljami Venekoski'
AUTHOR_EMAIL = "*****@*****.**"
VERSION = '0.1'
VOIKKO = Voikko("fi")