def __init__(self): # to display internal messages for debugging #~debug = False # limit of words to vocalize, default value is 1000 words. self.limit = 1000 # set the option value to enable the Last mark on voaclize # words in output # default value is True, can be disabled for debuging porpus self.enabled_last_mark = True # set the option to do statistical vocalization based # on collocations # default value is True, can be disabled for debuging porpus #self.enabled_stat_tashkeel = False self.enabled_stat_tashkeel = True # set the option to show the collocations marks # default value is False, can be enabled for debuging porpus self.enabled_show_collocation_mark = False # set the option to use scoring teashkeel chosing. self.select_by_score_enabled = False # set the option to do syntaxic Analysis # default value is True, can be disabled for debuging porpus self.enabled_syntaxic_analysis = True # set the option to do allow ajusting voaclization result, # for التقاء الساكنين # default value is True, can be disabled for debuging porpus self.enabled_ajust_vocalization = True # set the option to do Semantic Analysis # default value is True, can be disabled for debuging porpus self.enabled_semantic_analysis = True #~ self.enabled_semantic_analysis = False # enable the last mark (Harakat Al-I3rab) self.allow_syntax_last_mark = True # lexical analyzer self.analyzer = qalsadi.analex.Analex() self.analyzer.disable_allow_cache_use() #~ self.analyzer.enable_allow_cache_use() # syntaxic analyzer self.anasynt = aranasyn.anasyn.SyntaxAnalyzer() # semantic analyzer self.anasem = asmai.anasem.SemanticAnalyzer() #set the lexical analzer debugging self.analyzer.set_debug(debug) #set the lexical analzer word limit self.analyzer.set_limit(self.limit) #collocations dictionary for statistical tashkeel self.collo = coll.CollocationClass(self.enabled_show_collocation_mark) # unknown vocalizer for unrecognized words self.unknown_vocalizer = unknown_tashkeel.UnknownTashkeel()
def __init__(self, mycache_path=False): # configure logging logging.basicConfig(level=logging.INFO) #~ logging.basicConfig(level=logging.DEBUG) self.logger = logging.getLogger(__name__) #~ self.logger.info("Cache Path %s"%mycache_path) # to display internal messages for debugging #~debug = False # limit of words to vocalize, default value is 1000 words. self.limit = 1000 # set the option value to enable the Last mark on voaclize # words in output # default value is True, can be disabled for debuging porpus self.enabled_last_mark = True # set the option to do statistical vocalization based # on collocations # default value is True, can be disabled for debuging porpus #self.enabled_stat_tashkeel = False self.enabled_stat_tashkeel = True # set the option to show the collocations marks # default value is False, can be enabled for debuging porpus self.enabled_show_collocation_mark = False # set the option to use scoring teashkeel chosing. self.select_by_score_enabled = False # set the option to do syntaxic Analysis # default value is True, can be disabled for debuging porpus self.enabled_syntaxic_analysis = True # set the option to do allow ajusting voaclization result, # for التقاء الساكنين # default value is True, can be disabled for debuging porpus self.enabled_ajust_vocalization = True # set the option to do Semantic Analysis # default value is True, can be disabled for debuging porpus self.enabled_semantic_analysis = True #~ self.enabled_semantic_analysis = False # enable the last mark (Harakat Al-I3rab) self.allow_syntax_last_mark = True # lexical analyzer self.analyzer = qalsadi.analex.Analex(cache_path = mycache_path) #~ self.logger.info("Cache Path cache %s"%self.analyzer.cache.DB_PATH) #~ self.logger.info("Cache Path cache %s"%self.analyzer.cache.db.path) #~ self.analyzer.disable_allow_cache_use() self.analyzer.enable_allow_cache_use() # syntaxic analyzer self.anasynt = aranasyn.anasyn.SyntaxAnalyzer(cache_path = mycache_path) #~ self.logger.info("Cache Path cache syntax %s"%self.anasynt.cache.db.path) # to disable the training when do Tashkeel self.syntax_train_enabled = False # semantic analyzer self.anasem = asmai.anasem.SemanticAnalyzer(cache_path = mycache_path) #~ self.logger.info("Cache Path cache anasem %s"%self.anasem.syncache.db.path) #set the lexical analzer debugging self.analyzer.set_debug(debug) #set the lexical analzer word limit self.analyzer.set_limit(self.limit) #collocations dictionary for statistical tashkeel self.collo = coll.CollocationClass(self.enabled_show_collocation_mark) # unknown vocalizer for unrecognized words self.unknown_vocalizer = unknown_tashkeel.UnknownTashkeel()