示例#1
0
    def setUp(self):
        self.factory = DetectorFactory()

        profile_en = LangProfile('en')
        for w in self.TRAINING_EN.split():
            profile_en.add(w)
        self.factory.add_profile(profile_en, 0, 3)

        profile_fr = LangProfile('fr')
        for w in self.TRAINING_FR.split():
            profile_fr.add(w)
        self.factory.add_profile(profile_fr, 1, 3)

        profile_ja = LangProfile('ja')
        for w in self.TRAINING_JA.split():
            profile_ja.add(w)
        self.factory.add_profile(profile_ja, 2, 3)
from logging import getLogger
from pprint import pformat

from langdetect.detector_factory import DetectorFactory, PROFILES_DIRECTORY

logger = getLogger(__name__)

factory = DetectorFactory()
factory.load_profile(PROFILES_DIRECTORY)
lang_list = factory.get_lang_list() + [
    'und',
]
logger.debug("lang_list=\n{}".format(pformat(lang_list)))
示例#3
0
class DetectorTest(unittest.TestCase):
    TRAINING_EN = 'a a a b b c c d e'
    TRAINING_FR = 'a b b c c c d d d'
    TRAINING_JA = six.u('\u3042 \u3042 \u3042 \u3044 \u3046 \u3048 \u3048')
    JSON_LANG1 = '{"freq":{"A":3,"B":6,"C":3,"AB":2,"BC":1,"ABC":2,"BBC":1,"CBA":1},"n_words":[12,3,4],"name":"lang1"}'
    JSON_LANG2 = '{"freq":{"A":6,"B":3,"C":3,"AA":3,"AB":2,"ABC":1,"ABA":1,"CAA":1},"n_words":[12,5,3],"name":"lang2"}'

    def setUp(self):
        self.factory = DetectorFactory()

        profile_en = LangProfile('en')
        for w in self.TRAINING_EN.split():
            profile_en.add(w)
        self.factory.add_profile(profile_en, 0, 3)

        profile_fr = LangProfile('fr')
        for w in self.TRAINING_FR.split():
            profile_fr.add(w)
        self.factory.add_profile(profile_fr, 1, 3)

        profile_ja = LangProfile('ja')
        for w in self.TRAINING_JA.split():
            profile_ja.add(w)
        self.factory.add_profile(profile_ja, 2, 3)

    def test_detector1(self):
        detect = self.factory.create()
        detect.append('a')
        self.assertEqual(detect.detect(), 'en')

    def test_detector2(self):
        detect = self.factory.create()
        detect.append('b d')
        self.assertEqual(detect.detect(), 'fr')

    def test_detector3(self):
        detect = self.factory.create()
        detect.append('d e')
        self.assertEqual(detect.detect(), 'en')

    def test_detector4(self):
        detect = self.factory.create()
        detect.append(six.u('\u3042\u3042\u3042\u3042a'))
        self.assertEqual(detect.detect(), 'ja')

    def test_lang_list(self):
        langlist = self.factory.get_lang_list()
        self.assertEqual(len(langlist), 3)
        self.assertEqual(langlist[0], 'en')
        self.assertEqual(langlist[1], 'fr')
        self.assertEqual(langlist[2], 'ja')

    def test_factory_from_json_string(self):
        self.factory.clear()
        profiles = [self.JSON_LANG1, self.JSON_LANG2]
        self.factory.load_json_profile(profiles)
        langlist = self.factory.get_lang_list()
        self.assertEqual(len(langlist), 2)
        self.assertEqual(langlist[0], 'lang1')
        self.assertEqual(langlist[1], 'lang2')