def test_it_IT(self): h = hunspell_suggest.Hunspell(['it_IT']) self.assertEqual(h.suggest('principianti'), [('principianti', 0), ('principiati', -1), ('principiante', -1), ('principiarti', -1), ('principiasti', -1)])
def test_fi_FI_voikko(self): d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.has_spellchecking(), True) h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1), ('kisassa', -1), ('kisussa', -1)])
def test_de_DE_cs_CZ_pyhunspell(self): h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ']) self.assertEqual( h.suggest('Geschwindigkeitsubertre')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Geschwindigkeitsübertretungsverfahren')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0)) self.assertEqual( h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0)) self.assertEqual( h.suggest('filosofictejsi'), [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0), ('filosofie\u0300ti\u0300ji', -1)]) self.assertEqual( h.suggest('filosofictejs')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtější')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtějš')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
def test_sv_SE(self) -> None: h = hunspell_suggest.Hunspell(['sv_SE']) self.assertEqual( h.suggest('östgo'), [('östgot', 0), ('Östgöta', 0), ('östgöte', 0), ('östgotisk', 0), ('östgötsk', 0), ('östgötska', 0)]) self.assertEqual( h.suggest('östgot'), [('östgot', 0), ('östgotisk', 0), ('Östgot', -1)]) self.assertEqual( h.suggest('östgö'), [('Östgöta', 0), ('östgöte', 0), ('östgötsk', 0), ('östgötska', 0)]) self.assertEqual( h.suggest('östgöt')[0:5], [('Östgöta', 0), ('östgöte', 0), ('östgötsk', 0), ('östgötska', 0), ('östgot', -1)])
def test_es_ES(self): h = hunspell_suggest.Hunspell(['es_ES']) self.assertEqual(h.suggest('teneis'), [('tene\u0301is', 0), ('tenes', -1), ('tenis', -1), ('teneos', -1), ('tienes', -1), ('te neis', -1), ('te-neis', -1)]) self.assertEqual(h.suggest('tenéis')[0], ('tene\u0301is', 0))
def test_fi_FI_dictionary_file(self): # dictionary file is included in ibus-typing-booster h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual(h.suggest('kissa'), [('kissa', 0), ('kissaa', 0), ('kissani', 0), ('kissassa', 0), ('kissajuttu', 0), ('kissamaiseksi', 0)]) self.assertEqual(h.suggest('Pariisin-suurlahettila'), [('Pariisin-suurla\u0308hettila\u0308s', 0)])
def test_en_US(self): h = hunspell_suggest.Hunspell(['en_US']) self.assertEqual( h.suggest('camel'), [('camel', 0), ('camellia', 0), ('camelhair', 0), ('came', -1), ('Camel', -1), ('cameo', -1), ('came l', -1), ('camels', -1)])
def test_fi_FI_dictionary_file(self) -> None: # dictionary file is included in ibus-typing-booster # # This should work with and without voikko h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual( h.suggest('kissa'), [('kissa', 0), ('kissaa', 0), ('kissani', 0), ('kissassa', 0), ('kissajuttu', 0), ('kissamaiseksi',0)])
def test_fi_FI_voikko(self) -> None: d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.has_spellchecking(), True) h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual( h.suggest('kisssa'), [('kissa', -1), ('Kiassa', -1), ('kissaa', -1), ('kisassa', -1), ('kisussa', -1)]) self.assertEqual( h.suggest('Pariisin-suurlähettila'), [('Pariisin-suurla\u0308hettila\u0308s', 0), ('Pariisin-suurlähetetila', -1), ('Pariisin-suurlähettiala', -1)])
def test_en_US(self) -> None: h = hunspell_suggest.Hunspell(['en_US']) self.assertEqual( [('Camel', 0), ('camel', 0), ('Camelot', 0), ('camellia', 0), ('camelhair', 0), ('Camelopardalis', 0), ('CAM', -1), ('Cal', -1), ('Mel', -1), ('cal', -1), ('cam', -1), ('Carl', -1), ('Gael', -1), ('Jame', -1), ('call', -1), ('came', -1), ('come', -1), ('game', -1), ('Jamal', -1), ('Jamel', -1), ('Ocaml', -1), ('cable', -1), ('cameo', -1), ('calmer', -1), ('camels', -1), ('comely', -1), ('compel', -1), ('gamely', -1), ("Camel's", -1), ('Camilla', -1), ('Camille', -1), ('Carmela', -1), ('Carmelo', -1), ("Jamel's", -1), ("camel's", -1), ('caramel', -1), ('Carmella', -1)], h.suggest('camel'))
def __init__(self, user_db_file=''): global DEBUG_LEVEL try: DEBUG_LEVEL = int(os.getenv('IBUS_TYPING_BOOSTER_DEBUG_LEVEL')) except (TypeError, ValueError): DEBUG_LEVEL = int(0) if DEBUG_LEVEL > 1: LOGGER.debug( 'TabSqliteDb.__init__(user_db_file = %s)', user_db_file) self.user_db_file = user_db_file if not self.user_db_file: self.user_db_file = path.join( os.getenv('HOME'), '.local/share/ibus-typing-booster/user.db') if (self.user_db_file != ':memory:' and not os.path.isdir(os.path.dirname(self.user_db_file))): os.makedirs(os.path.dirname(self.user_db_file)) self._phrase_table_column_names = [ 'id', 'input_phrase', 'phrase', 'p_phrase', 'pp_phrase', 'user_freq', 'timestamp'] self.old_phrases = [] self.hunspell_obj = hunspell_suggest.Hunspell(()) if self.user_db_file != ':memory:': if not os.path.exists(self.user_db_file): LOGGER.info( 'The user database %(udb)s does not exist yet.', {'udb': self.user_db_file}) else: try: desc = self.get_database_desc(self.user_db_file) if (desc is None or desc["version"] != USER_DATABASE_VERSION or (self.get_number_of_columns_of_phrase_table( self.user_db_file) != len(self._phrase_table_column_names))): LOGGER.info( 'The user database %(udb)s seems incompatible', {'udb': self.user_db_file}) if desc is None: LOGGER.info( 'No version information in the database') elif desc["version"] != USER_DATABASE_VERSION: LOGGER.info( 'The version of the database does not match ' '(too old or too new?)') LOGGER.info( 'ibus-typing-booster wants version=%s', USER_DATABASE_VERSION) LOGGER.info( 'But the database actually has version=%s', desc["version"]) elif (self.get_number_of_columns_of_phrase_table( self.user_db_file) != len(self._phrase_table_column_names)): LOGGER.info( 'The number of columns of the database ' 'does not match') LOGGER.info( 'ibus-typing-booster expects %(col)s columns', {'col': len(self._phrase_table_column_names)}) LOGGER.info( 'The database actually has %(col)s columns', {'col': self.get_number_of_columns_of_phrase_table( self.user_db_file)}) LOGGER.info( 'Trying to recover the phrases from the old, ' 'incompatible database') self.old_phrases = self.extract_user_phrases() timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S') LOGGER.info( 'Renaming the incompatible database to "%(name)s"', {'name': self.user_db_file+timestamp}) if os.path.exists(self.user_db_file): os.rename(self.user_db_file, self.user_db_file+timestamp) if os.path.exists(self.user_db_file+'-shm'): os.rename(self.user_db_file+'-shm', self.user_db_file+'-shm'+timestamp) if os.path.exists(self.user_db_file+'-wal'): os.rename(self.user_db_file+'-wal', self.user_db_file+'-wal'+timestamp) LOGGER.info( 'Creating a new, empty database "%(name)s".', {'name': self.user_db_file}) self.init_user_db() LOGGER.info( 'If user phrases were successfully recovered ' 'from the old, ' 'incompatible database, they will be used to ' 'initialize the new database.') else: LOGGER.info( 'Compatible database %(db)s found.', {'db': self.user_db_file}) except Exception: LOGGER.exception( 'Unexpected error trying to find user database.') # open user phrase database try: LOGGER.info( 'Connect to the database %(name)s.', {'name': self.user_db_file}) self.database = sqlite3.connect(self.user_db_file) self.database.executescript(''' PRAGMA encoding = "UTF-8"; PRAGMA case_sensitive_like = true; PRAGMA page_size = 4096; PRAGMA cache_size = 20000; PRAGMA temp_store = MEMORY; PRAGMA journal_mode = WAL; PRAGMA journal_size_limit = 1000000; PRAGMA synchronous = NORMAL; ATTACH DATABASE "%s" AS user_db; ''' % self.user_db_file) except Exception: LOGGER.exception( 'Could not open the database %(name)s.', {'name': self.user_db_file}) timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S') LOGGER.info( 'Renaming the incompatible database to "%(name)s".', {'name': self.user_db_file+timestamp}) if os.path.exists(self.user_db_file): os.rename(self.user_db_file, self.user_db_file+timestamp) if os.path.exists(self.user_db_file+'-shm'): os.rename(self.user_db_file+'-shm', self.user_db_file+'-shm'+timestamp) if os.path.exists(self.user_db_file+'-wal'): os.rename(self.user_db_file+'-wal', self.user_db_file+'-wal'+timestamp) LOGGER.info( 'Creating a new, empty database "%(name)s".', {'name': self.user_db_file}) self.init_user_db() self.database = sqlite3.connect(self.user_db_file) self.database.executescript(''' PRAGMA encoding = "UTF-8"; PRAGMA case_sensitive_like = true; PRAGMA page_size = 4096; PRAGMA cache_size = 20000; PRAGMA temp_store = MEMORY; PRAGMA journal_mode = WAL; PRAGMA journal_size_limit = 1000000; PRAGMA synchronous = NORMAL; ATTACH DATABASE "%s" AS user_db; ''' % self.user_db_file) self.create_tables() if self.old_phrases: sqlargs = [] for ophrase in self.old_phrases: sqlargs.append( {'input_phrase': ophrase[0], 'phrase': ophrase[0], 'p_phrase': '', 'pp_phrase': '', 'user_freq': ophrase[1], 'timestamp': time.time()}) sqlstr = ''' INSERT INTO user_db.phrases (input_phrase, phrase, p_phrase, pp_phrase, user_freq, timestamp) VALUES (:input_phrase, :phrase, :p_phrase, :pp_phrase, :user_freq, :timestamp) ;''' try: self.database.executemany(sqlstr, sqlargs) except Exception: LOGGER.exception( 'Unexpected error inserting old phrases ' 'into the user database.') self.database.commit() self.database.execute('PRAGMA wal_checkpoint;') # do not call this always on intialization for the moment. # It makes the already slow “python engine/main.py --xml” # to list the engines even slower and may break the listing # of the engines completely if there is a problem with # optimizing the databases. Probably bring this back as an # option later if the code in self.optimize_database() is # improved to do anything useful. #try: # self.optimize_database() #except: # print "exception in optimize_database()" # traceback.print_exc () # try create all hunspell-tables in user database self.create_indexes(commit=False) self.generate_userdb_desc()
def __init__(self, config_filename='', user_db_file=''): global DEBUG_LEVEL try: DEBUG_LEVEL = int(os.getenv('IBUS_TYPING_BOOSTER_DEBUG_LEVEL')) except (TypeError, ValueError): DEBUG_LEVEL = int(0) if DEBUG_LEVEL > 1: sys.stderr.write( "tabsqlitedb.__init__(config_filename = %s, user_db_file = %s)\n" % (config_filename, user_db_file)) self.user_db_file = user_db_file if not self.user_db_file: self.user_db_file = path.join( os.getenv('HOME'), '.local/share/ibus-typing-booster/user.db') if (self.user_db_file != ':memory:' and not os.path.isdir(os.path.dirname(self.user_db_file))): os.makedirs(os.path.dirname(self.user_db_file)) self._phrase_table_column_names = [ 'id', 'input_phrase', 'phrase', 'p_phrase', 'pp_phrase', 'user_freq', 'timestamp' ] self.old_phrases = [] self.ime_properties = ImeProperties(config_filename) self._language = self.ime_properties.get('language') self._normalization_form_internal = 'NFD' dictionary_names = [ x.replace('.dic', '').strip() for x in self.ime_properties.get("hunspell_dict").split(',') ] self.hunspell_obj = hunspell_suggest.Hunspell(dictionary_names) if self.user_db_file != ':memory:': if not os.path.exists(self.user_db_file): sys.stderr.write( "The user database %(udb)s does not exist yet.\n" % {'udb': self.user_db_file}) else: try: desc = self.get_database_desc(self.user_db_file) if (desc == None or desc["version"] != user_database_version or (self.get_number_of_columns_of_phrase_table( self.user_db_file) != len( self._phrase_table_column_names))): sys.stderr.write("The user database %(udb)s " % {'udb': self.user_db_file} + "seems to be incompatible.\n") if desc == None: sys.stderr.write( "There is no version information in " + "the database.\n") elif desc["version"] != user_database_version: sys.stderr.write( "The version of the database does not match " + "(too old or too new?).\n") sys.stderr.write( "ibus-typing-booster wants version=%s\n" % user_database_version) sys.stderr.write( "But the database actually has version=%s\n" % desc["version"]) elif (self.get_number_of_columns_of_phrase_table( self.user_db_file) != len( self._phrase_table_column_names)): sys.stderr.write( "The number of columns of the database " + "does not match.\n") sys.stderr.write( "ibus-typing-booster expects %(col)s columns.\n" % {'col': len(self._phrase_table_column_names)}) sys.stderr.write( "But the database actually has " + "%(col)s columns.\n" % { 'col': self.get_number_of_columns_of_phrase_table( self.user_db_file) }) sys.stderr.write( "Trying to recover the phrases from the old, " + "incompatible database.\n") self.old_phrases = self.extract_user_phrases() timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S') sys.stderr.write( 'Renaming the incompatible database to ' + '"%(name)s".\n' % {'name': self.user_db_file + timestamp}) if os.path.exists(self.user_db_file): os.rename(self.user_db_file, self.user_db_file + timestamp) if os.path.exists(self.user_db_file + '-shm'): os.rename(self.user_db_file + '-shm', self.user_db_file + '-shm' + timestamp) if os.path.exists(self.user_db_file + '-wal'): os.rename(self.user_db_file + '-wal', self.user_db_file + '-wal' + timestamp) sys.stderr.write( "Creating a new, empty database \"%(name)s\".\n" % {'name': self.user_db_file}) self.init_user_db() sys.stderr.write( "If user phrases were successfully recovered " + "from the old,\n" + "incompatible database, they will be used to " + "initialize the new database.\n") else: sys.stderr.write( "Compatible database %(db)s found.\n" % {'db': self.user_db_file}) except: traceback.print_exc() # open user phrase database try: sys.stderr.write("Connect to the database %(name)s.\n" % {'name': self.user_db_file}) self.db = sqlite3.connect(self.user_db_file) self.db.execute('PRAGMA encoding = "UTF-8";') self.db.execute('PRAGMA case_sensitive_like = true;') self.db.execute('PRAGMA page_size = 4096; ') self.db.execute('PRAGMA cache_size = 20000;') self.db.execute('PRAGMA temp_store = MEMORY;') self.db.execute('PRAGMA journal_mode = WAL;') self.db.execute('PRAGMA journal_size_limit = 1000000;') self.db.execute('PRAGMA synchronous = NORMAL;') self.db.execute('ATTACH DATABASE "%s" AS user_db;' % self.user_db_file) except: sys.stderr.write("Could not open the database %(name)s.\n" % {'name': self.user_db_file}) timestamp = time.strftime('-%Y-%m-%d_%H:%M:%S') sys.stderr.write( "Renaming the incompatible database to \"%(name)s\".\n" % {'name': self.user_db_file + timestamp}) if os.path.exists(self.user_db_file): os.rename(self.user_db_file, self.user_db_file + timestamp) if os.path.exists(self.user_db_file + '-shm'): os.rename(self.user_db_file + '-shm', self.user_db_file + '-shm' + timestamp) if os.path.exists(self.user_db_file + '-wal'): os.rename(self.user_db_file + '-wal', self.user_db_file + '-wal' + timestamp) sys.stderr.write("Creating a new, empty database \"%(name)s\".\n" % {'name': self.user_db_file}) self.init_user_db() self.db = sqlite3.connect(self.user_db_file) self.db.execute('PRAGMA encoding = "UTF-8";') self.db.execute('PRAGMA case_sensitive_like = true;') self.db.execute('PRAGMA page_size = 4096; ') self.db.execute('PRAGMA cache_size = 20000;') self.db.execute('PRAGMA temp_store = MEMORY;') self.db.execute('PRAGMA journal_mode = WAL;') self.db.execute('PRAGMA journal_size_limit = 1000000;') self.db.execute('PRAGMA synchronous = NORMAL;') self.db.execute('ATTACH DATABASE "%s" AS user_db;' % self.user_db_file) self.create_tables() if self.old_phrases: sqlargs = [] for x in self.old_phrases: sqlargs.append({ 'input_phrase': x[0], 'phrase': x[0], 'p_phrase': '', 'pp_phrase': '', 'user_freq': x[1], 'timestamp': time.time() }) sqlstr = ''' INSERT INTO user_db.phrases (input_phrase, phrase, p_phrase, pp_phrase, user_freq, timestamp) VALUES (:input_phrase, :phrase, :p_phrase, :pp_phrase, :user_freq, :timestamp) ;''' try: self.db.executemany(sqlstr, sqlargs) except: traceback.print_exc() self.db.commit() self.db.execute('PRAGMA wal_checkpoint;') # do not call this always on intialization for the moment. # It makes the already slow “python engine/main.py --xml” # to list the engines even slower and may break the listing # of the engines completely if there is a problem with # optimizing the databases. Probably bring this back as an # option later if the code in self.optimize_database() is # improved to do anything useful. #try: # self.optimize_database() #except: # print "exception in optimize_database()" # traceback.print_exc () # try create all hunspell-tables in user database self.create_indexes(commit=False) self.generate_userdb_desc()
def test_el_GR(self): h = hunspell_suggest.Hunspell(['el_GR']) self.assertEqual(h.suggest('αλφαβητο')[0], ('αλφάβητο', 0))
def test_fr_FR(self): h = hunspell_suggest.Hunspell(['fr_FR']) self.assertEqual(h.suggest('differemmen'), [('diffe\u0301remment', 0)])
def test_fi_FI(self): h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual(h.suggest('kissa'), [('kissa', 0)]) self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1), ('kisassa', -1), ('kisussa', -1)])