def test_it_IT(self):
     h = hunspell_suggest.Hunspell(['it_IT'])
     self.assertEqual(h.suggest('principianti'), [('principianti', 0),
                                                  ('principiati', -1),
                                                  ('principiante', -1),
                                                  ('principiarti', -1),
                                                  ('principiasti', -1)])
 def test_fi_FI_voikko(self):
     d = hunspell_suggest.Dictionary('fi_FI')
     self.assertEqual(d.has_spellchecking(), True)
     h = hunspell_suggest.Hunspell(['fi_FI'])
     self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1),
                                            ('kisassa', -1),
                                            ('kisussa', -1)])
 def test_de_DE_cs_CZ_pyhunspell(self):
     h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ'])
     self.assertEqual(
         h.suggest('Geschwindigkeitsubertre')[0],
         ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
     self.assertEqual(
         h.suggest('Geschwindigkeitsübertretungsverfahren')[0],
         ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
     self.assertEqual(
         h.suggest('Glühwürmchen')[0],
         ('Glu\u0308hwu\u0308rmchen', 0))
     self.assertEqual(
         h.suggest('Alpengluhen')[0],
         ('Alpenglu\u0308hen', 0))
     self.assertEqual(
         h.suggest('filosofictejsi'),
         [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0),
          ('filosofie\u0300ti\u0300ji', -1)])
     self.assertEqual(
         h.suggest('filosofictejs')[0],
         ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
     self.assertEqual(
         h.suggest('filosofičtější')[0],
         ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
     self.assertEqual(
         h.suggest('filosofičtějš')[0],
         ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
 def test_sv_SE(self) -> None:
     h = hunspell_suggest.Hunspell(['sv_SE'])
     self.assertEqual(
         h.suggest('östgo'),
         [('östgot', 0),
          ('Östgöta', 0),
          ('östgöte', 0),
          ('östgotisk', 0),
          ('östgötsk', 0),
          ('östgötska', 0)])
     self.assertEqual(
         h.suggest('östgot'),
         [('östgot', 0),
          ('östgotisk', 0),
          ('Östgot', -1)])
     self.assertEqual(
         h.suggest('östgö'),
         [('Östgöta', 0),
          ('östgöte', 0),
          ('östgötsk', 0),
          ('östgötska', 0)])
     self.assertEqual(
         h.suggest('östgöt')[0:5],
         [('Östgöta', 0),
          ('östgöte', 0),
          ('östgötsk', 0),
          ('östgötska', 0),
          ('östgot', -1)])
 def test_es_ES(self):
     h = hunspell_suggest.Hunspell(['es_ES'])
     self.assertEqual(h.suggest('teneis'), [('tene\u0301is', 0),
                                            ('tenes', -1), ('tenis', -1),
                                            ('teneos', -1), ('tienes', -1),
                                            ('te neis', -1),
                                            ('te-neis', -1)])
     self.assertEqual(h.suggest('tenéis')[0], ('tene\u0301is', 0))
 def test_fi_FI_dictionary_file(self):
     # dictionary file is included in ibus-typing-booster
     h = hunspell_suggest.Hunspell(['fi_FI'])
     self.assertEqual(h.suggest('kissa'), [('kissa', 0), ('kissaa', 0),
                                           ('kissani', 0), ('kissassa', 0),
                                           ('kissajuttu', 0),
                                           ('kissamaiseksi', 0)])
     self.assertEqual(h.suggest('Pariisin-suurlahettila'),
                      [('Pariisin-suurla\u0308hettila\u0308s', 0)])
 def test_en_US(self):
     h = hunspell_suggest.Hunspell(['en_US'])
     self.assertEqual(
         h.suggest('camel'),
         [('camel', 0),
          ('camellia', 0),
          ('camelhair', 0),
          ('came', -1),
          ('Camel', -1),
          ('cameo', -1),
          ('came l', -1),
          ('camels', -1)])
 def test_fi_FI_dictionary_file(self) -> None:
     # dictionary file is included in ibus-typing-booster
     #
     # This should work with and without voikko
     h = hunspell_suggest.Hunspell(['fi_FI'])
     self.assertEqual(
         h.suggest('kissa'),
         [('kissa', 0),
          ('kissaa', 0),
          ('kissani', 0),
          ('kissassa', 0),
          ('kissajuttu', 0),
          ('kissamaiseksi',0)])
 def test_fi_FI_voikko(self) -> None:
     d = hunspell_suggest.Dictionary('fi_FI')
     self.assertEqual(d.has_spellchecking(), True)
     h = hunspell_suggest.Hunspell(['fi_FI'])
     self.assertEqual(
         h.suggest('kisssa'),
         [('kissa', -1),
          ('Kiassa', -1),
          ('kissaa', -1),
          ('kisassa', -1),
          ('kisussa', -1)])
     self.assertEqual(
         h.suggest('Pariisin-suurlähettila'),
         [('Pariisin-suurla\u0308hettila\u0308s', 0),
          ('Pariisin-suurlähetetila', -1),
          ('Pariisin-suurlähettiala', -1)])
 def test_en_US(self) -> None:
     h = hunspell_suggest.Hunspell(['en_US'])
     self.assertEqual(
         [('Camel', 0),
          ('camel', 0),
          ('Camelot', 0),
          ('camellia', 0),
          ('camelhair', 0),
          ('Camelopardalis', 0),
          ('CAM', -1),
          ('Cal', -1),
          ('Mel', -1),
          ('cal', -1),
          ('cam', -1),
          ('Carl', -1),
          ('Gael', -1),
          ('Jame', -1),
          ('call', -1),
          ('came', -1),
          ('come', -1),
          ('game', -1),
          ('Jamal', -1),
          ('Jamel', -1),
          ('Ocaml', -1),
          ('cable', -1),
          ('cameo', -1),
          ('calmer', -1),
          ('camels', -1),
          ('comely', -1),
          ('compel', -1),
          ('gamely', -1),
          ("Camel's", -1),
          ('Camilla', -1),
          ('Camille', -1),
          ('Carmela', -1),
          ('Carmelo', -1),
          ("Jamel's", -1),
          ("camel's", -1),
          ('caramel', -1),
          ('Carmella', -1)],
         h.suggest('camel'))
Пример #11
0
    def __init__(self, user_db_file=''):
        global DEBUG_LEVEL
        try:
            DEBUG_LEVEL = int(os.getenv('IBUS_TYPING_BOOSTER_DEBUG_LEVEL'))
        except (TypeError, ValueError):
            DEBUG_LEVEL = int(0)
        if DEBUG_LEVEL > 1:
            LOGGER.debug(
                'TabSqliteDb.__init__(user_db_file = %s)', user_db_file)
        self.user_db_file = user_db_file
        if not self.user_db_file:
            self.user_db_file = path.join(
                os.getenv('HOME'), '.local/share/ibus-typing-booster/user.db')
        if (self.user_db_file != ':memory:'
                and not os.path.isdir(os.path.dirname(self.user_db_file))):
            os.makedirs(os.path.dirname(self.user_db_file))
        self._phrase_table_column_names = [
            'id',
            'input_phrase',
            'phrase',
            'p_phrase',
            'pp_phrase',
            'user_freq',
            'timestamp']

        self.old_phrases = []

        self.hunspell_obj = hunspell_suggest.Hunspell(())

        if self.user_db_file != ':memory:':
            if not os.path.exists(self.user_db_file):
                LOGGER.info(
                    'The user database %(udb)s does not exist yet.',
                    {'udb': self.user_db_file})
            else:
                try:
                    desc = self.get_database_desc(self.user_db_file)
                    if (desc is None
                            or desc["version"] != USER_DATABASE_VERSION
                            or (self.get_number_of_columns_of_phrase_table(
                                self.user_db_file)
                                != len(self._phrase_table_column_names))):
                        LOGGER.info(
                            'The user database %(udb)s seems incompatible',
                            {'udb': self.user_db_file})
                        if desc is None:
                            LOGGER.info(
                                'No version information in the database')
                        elif desc["version"] != USER_DATABASE_VERSION:
                            LOGGER.info(
                                'The version of the database does not match '
                                '(too old or too new?)')
                            LOGGER.info(
                                'ibus-typing-booster wants version=%s',
                                USER_DATABASE_VERSION)
                            LOGGER.info(
                                'But the  database actually has version=%s',
                                desc["version"])
                        elif (self.get_number_of_columns_of_phrase_table(
                                self.user_db_file)
                              != len(self._phrase_table_column_names)):
                            LOGGER.info(
                                'The number of columns of the database '
                                'does not match')
                            LOGGER.info(
                                'ibus-typing-booster expects %(col)s columns',
                                {'col': len(self._phrase_table_column_names)})
                            LOGGER.info(
                                'The database actually has %(col)s columns',
                                {'col':
                                 self.get_number_of_columns_of_phrase_table(
                                     self.user_db_file)})
                        LOGGER.info(
                            'Trying to recover the phrases from the old, '
                            'incompatible database')
                        self.old_phrases = self.extract_user_phrases()
                        timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S')
                        LOGGER.info(
                            'Renaming the incompatible database to "%(name)s"',
                            {'name': self.user_db_file+timestamp})
                        if os.path.exists(self.user_db_file):
                            os.rename(self.user_db_file,
                                      self.user_db_file+timestamp)
                        if os.path.exists(self.user_db_file+'-shm'):
                            os.rename(self.user_db_file+'-shm',
                                      self.user_db_file+'-shm'+timestamp)
                        if os.path.exists(self.user_db_file+'-wal'):
                            os.rename(self.user_db_file+'-wal',
                                      self.user_db_file+'-wal'+timestamp)
                        LOGGER.info(
                            'Creating a new, empty database "%(name)s".',
                            {'name': self.user_db_file})
                        self.init_user_db()
                        LOGGER.info(
                            'If user phrases were successfully recovered '
                            'from the old, '
                            'incompatible database, they will be used to '
                            'initialize the new database.')
                    else:
                        LOGGER.info(
                            'Compatible database %(db)s found.',
                            {'db': self.user_db_file})
                except Exception:
                    LOGGER.exception(
                        'Unexpected error trying to find user database.')

        # open user phrase database
        try:
            LOGGER.info(
                'Connect to the database %(name)s.',
                {'name': self.user_db_file})
            self.database = sqlite3.connect(self.user_db_file)
            self.database.executescript('''
                PRAGMA encoding = "UTF-8";
                PRAGMA case_sensitive_like = true;
                PRAGMA page_size = 4096;
                PRAGMA cache_size = 20000;
                PRAGMA temp_store = MEMORY;
                PRAGMA journal_mode = WAL;
                PRAGMA journal_size_limit = 1000000;
                PRAGMA synchronous = NORMAL;
                ATTACH DATABASE "%s" AS user_db;
            ''' % self.user_db_file)
        except Exception:
            LOGGER.exception(
                'Could not open the database %(name)s.',
                {'name': self.user_db_file})
            timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S')
            LOGGER.info(
                'Renaming the incompatible database to "%(name)s".',
                {'name': self.user_db_file+timestamp})
            if os.path.exists(self.user_db_file):
                os.rename(self.user_db_file, self.user_db_file+timestamp)
            if os.path.exists(self.user_db_file+'-shm'):
                os.rename(self.user_db_file+'-shm',
                          self.user_db_file+'-shm'+timestamp)
            if os.path.exists(self.user_db_file+'-wal'):
                os.rename(self.user_db_file+'-wal',
                          self.user_db_file+'-wal'+timestamp)
            LOGGER.info(
                'Creating a new, empty database "%(name)s".',
                {'name': self.user_db_file})
            self.init_user_db()
            self.database = sqlite3.connect(self.user_db_file)
            self.database.executescript('''
                PRAGMA encoding = "UTF-8";
                PRAGMA case_sensitive_like = true;
                PRAGMA page_size = 4096;
                PRAGMA cache_size = 20000;
                PRAGMA temp_store = MEMORY;
                PRAGMA journal_mode = WAL;
                PRAGMA journal_size_limit = 1000000;
                PRAGMA synchronous = NORMAL;
                ATTACH DATABASE "%s" AS user_db;
            ''' % self.user_db_file)
        self.create_tables()
        if self.old_phrases:
            sqlargs = []
            for ophrase in self.old_phrases:
                sqlargs.append(
                    {'input_phrase': ophrase[0],
                     'phrase': ophrase[0],
                     'p_phrase': '',
                     'pp_phrase': '',
                     'user_freq': ophrase[1],
                     'timestamp': time.time()})
            sqlstr = '''
            INSERT INTO user_db.phrases (input_phrase, phrase, p_phrase, pp_phrase, user_freq, timestamp)
            VALUES (:input_phrase, :phrase, :p_phrase, :pp_phrase, :user_freq, :timestamp)
            ;'''
            try:
                self.database.executemany(sqlstr, sqlargs)
            except Exception:
                LOGGER.exception(
                    'Unexpected error inserting old phrases '
                    'into the user database.')
            self.database.commit()
            self.database.execute('PRAGMA wal_checkpoint;')

        # do not call this always on intialization for the moment.
        # It makes the already slow “python engine/main.py --xml”
        # to list the engines even slower and may break the listing
        # of the engines completely if there is a problem with
        # optimizing the databases. Probably bring this back as an
        # option later if the code in self.optimize_database() is
        # improved to do anything useful.
        #try:
        #    self.optimize_database()
        #except:
        #    print "exception in optimize_database()"
        #    traceback.print_exc ()

        # try create all hunspell-tables in user database
        self.create_indexes(commit=False)
        self.generate_userdb_desc()
    def __init__(self, config_filename='', user_db_file=''):
        global DEBUG_LEVEL
        try:
            DEBUG_LEVEL = int(os.getenv('IBUS_TYPING_BOOSTER_DEBUG_LEVEL'))
        except (TypeError, ValueError):
            DEBUG_LEVEL = int(0)
        if DEBUG_LEVEL > 1:
            sys.stderr.write(
                "tabsqlitedb.__init__(config_filename = %s, user_db_file = %s)\n"
                % (config_filename, user_db_file))
        self.user_db_file = user_db_file
        if not self.user_db_file:
            self.user_db_file = path.join(
                os.getenv('HOME'), '.local/share/ibus-typing-booster/user.db')
        if (self.user_db_file != ':memory:'
                and not os.path.isdir(os.path.dirname(self.user_db_file))):
            os.makedirs(os.path.dirname(self.user_db_file))
        self._phrase_table_column_names = [
            'id', 'input_phrase', 'phrase', 'p_phrase', 'pp_phrase',
            'user_freq', 'timestamp'
        ]

        self.old_phrases = []

        self.ime_properties = ImeProperties(config_filename)
        self._language = self.ime_properties.get('language')
        self._normalization_form_internal = 'NFD'

        dictionary_names = [
            x.replace('.dic', '').strip()
            for x in self.ime_properties.get("hunspell_dict").split(',')
        ]
        self.hunspell_obj = hunspell_suggest.Hunspell(dictionary_names)

        if self.user_db_file != ':memory:':
            if not os.path.exists(self.user_db_file):
                sys.stderr.write(
                    "The user database %(udb)s does not exist yet.\n" %
                    {'udb': self.user_db_file})
            else:
                try:
                    desc = self.get_database_desc(self.user_db_file)
                    if (desc == None
                            or desc["version"] != user_database_version
                            or (self.get_number_of_columns_of_phrase_table(
                                self.user_db_file) != len(
                                    self._phrase_table_column_names))):
                        sys.stderr.write("The user database %(udb)s " %
                                         {'udb': self.user_db_file} +
                                         "seems to be incompatible.\n")
                        if desc == None:
                            sys.stderr.write(
                                "There is no version information in " +
                                "the database.\n")
                        elif desc["version"] != user_database_version:
                            sys.stderr.write(
                                "The version of the database does not match " +
                                "(too old or too new?).\n")
                            sys.stderr.write(
                                "ibus-typing-booster wants version=%s\n" %
                                user_database_version)
                            sys.stderr.write(
                                "But the  database actually has version=%s\n" %
                                desc["version"])
                        elif (self.get_number_of_columns_of_phrase_table(
                                self.user_db_file) != len(
                                    self._phrase_table_column_names)):
                            sys.stderr.write(
                                "The number of columns of the database " +
                                "does not match.\n")
                            sys.stderr.write(
                                "ibus-typing-booster expects %(col)s columns.\n"
                                %
                                {'col': len(self._phrase_table_column_names)})
                            sys.stderr.write(
                                "But the database actually has " +
                                "%(col)s columns.\n" % {
                                    'col':
                                    self.get_number_of_columns_of_phrase_table(
                                        self.user_db_file)
                                })
                        sys.stderr.write(
                            "Trying to recover the phrases from the old, " +
                            "incompatible database.\n")
                        self.old_phrases = self.extract_user_phrases()
                        timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S')
                        sys.stderr.write(
                            'Renaming the incompatible database to ' +
                            '"%(name)s".\n' %
                            {'name': self.user_db_file + timestamp})
                        if os.path.exists(self.user_db_file):
                            os.rename(self.user_db_file,
                                      self.user_db_file + timestamp)
                        if os.path.exists(self.user_db_file + '-shm'):
                            os.rename(self.user_db_file + '-shm',
                                      self.user_db_file + '-shm' + timestamp)
                        if os.path.exists(self.user_db_file + '-wal'):
                            os.rename(self.user_db_file + '-wal',
                                      self.user_db_file + '-wal' + timestamp)
                        sys.stderr.write(
                            "Creating a new, empty database \"%(name)s\".\n" %
                            {'name': self.user_db_file})
                        self.init_user_db()
                        sys.stderr.write(
                            "If user phrases were successfully recovered " +
                            "from the old,\n" +
                            "incompatible database, they will be used to " +
                            "initialize the new database.\n")
                    else:
                        sys.stderr.write(
                            "Compatible database %(db)s found.\n" %
                            {'db': self.user_db_file})
                except:
                    traceback.print_exc()

        # open user phrase database
        try:
            sys.stderr.write("Connect to the database %(name)s.\n" %
                             {'name': self.user_db_file})
            self.db = sqlite3.connect(self.user_db_file)
            self.db.execute('PRAGMA encoding = "UTF-8";')
            self.db.execute('PRAGMA case_sensitive_like = true;')
            self.db.execute('PRAGMA page_size = 4096; ')
            self.db.execute('PRAGMA cache_size = 20000;')
            self.db.execute('PRAGMA temp_store = MEMORY;')
            self.db.execute('PRAGMA journal_mode = WAL;')
            self.db.execute('PRAGMA journal_size_limit = 1000000;')
            self.db.execute('PRAGMA synchronous = NORMAL;')
            self.db.execute('ATTACH DATABASE "%s" AS user_db;' %
                            self.user_db_file)
        except:
            sys.stderr.write("Could not open the database %(name)s.\n" %
                             {'name': self.user_db_file})
            timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S')
            sys.stderr.write(
                "Renaming the incompatible database to \"%(name)s\".\n" %
                {'name': self.user_db_file + timestamp})
            if os.path.exists(self.user_db_file):
                os.rename(self.user_db_file, self.user_db_file + timestamp)
            if os.path.exists(self.user_db_file + '-shm'):
                os.rename(self.user_db_file + '-shm',
                          self.user_db_file + '-shm' + timestamp)
            if os.path.exists(self.user_db_file + '-wal'):
                os.rename(self.user_db_file + '-wal',
                          self.user_db_file + '-wal' + timestamp)
            sys.stderr.write("Creating a new, empty database \"%(name)s\".\n" %
                             {'name': self.user_db_file})
            self.init_user_db()
            self.db = sqlite3.connect(self.user_db_file)
            self.db.execute('PRAGMA encoding = "UTF-8";')
            self.db.execute('PRAGMA case_sensitive_like = true;')
            self.db.execute('PRAGMA page_size = 4096; ')
            self.db.execute('PRAGMA cache_size = 20000;')
            self.db.execute('PRAGMA temp_store = MEMORY;')
            self.db.execute('PRAGMA journal_mode = WAL;')
            self.db.execute('PRAGMA journal_size_limit = 1000000;')
            self.db.execute('PRAGMA synchronous = NORMAL;')
            self.db.execute('ATTACH DATABASE "%s" AS user_db;' %
                            self.user_db_file)
        self.create_tables()
        if self.old_phrases:
            sqlargs = []
            for x in self.old_phrases:
                sqlargs.append({
                    'input_phrase': x[0],
                    'phrase': x[0],
                    'p_phrase': '',
                    'pp_phrase': '',
                    'user_freq': x[1],
                    'timestamp': time.time()
                })
            sqlstr = '''
            INSERT INTO user_db.phrases (input_phrase, phrase, p_phrase, pp_phrase, user_freq, timestamp)
            VALUES (:input_phrase, :phrase, :p_phrase, :pp_phrase, :user_freq, :timestamp)
            ;'''
            try:
                self.db.executemany(sqlstr, sqlargs)
            except:
                traceback.print_exc()
            self.db.commit()
            self.db.execute('PRAGMA wal_checkpoint;')

        # do not call this always on intialization for the moment.
        # It makes the already slow “python engine/main.py --xml”
        # to list the engines even slower and may break the listing
        # of the engines completely if there is a problem with
        # optimizing the databases. Probably bring this back as an
        # option later if the code in self.optimize_database() is
        # improved to do anything useful.
        #try:
        #    self.optimize_database()
        #except:
        #    print "exception in optimize_database()"
        #    traceback.print_exc ()

        # try create all hunspell-tables in user database
        self.create_indexes(commit=False)
        self.generate_userdb_desc()
 def test_el_GR(self):
     h = hunspell_suggest.Hunspell(['el_GR'])
     self.assertEqual(h.suggest('αλφαβητο')[0], ('αλφάβητο', 0))
 def test_fr_FR(self):
     h = hunspell_suggest.Hunspell(['fr_FR'])
     self.assertEqual(h.suggest('differemmen'), [('diffe\u0301remment', 0)])
Пример #15
0
 def test_fi_FI(self):
     h = hunspell_suggest.Hunspell(['fi_FI'])
     self.assertEqual(h.suggest('kissa'), [('kissa', 0)])
     self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1),
                                            ('kisassa', -1),
                                            ('kisussa', -1)])