예제 #1
0
    def __init__(self):
        # to display internal messages for debugging
        #~debug = False
        # limit of words to vocalize, default value is 1000 words.
        self.limit = 1000

        #  set the option value to enable the Last mark on voaclize
        # words in output
        # default value is True, can be disabled for debuging porpus
        self.enabled_last_mark = True

        # set the option to do statistical vocalization based
        # on collocations
        # default value is True, can be disabled for debuging porpus
        #self.enabled_stat_tashkeel = False
        self.enabled_stat_tashkeel = True

        # set the option to show the collocations marks
        # default value is False, can be enabled for debuging porpus
        self.enabled_show_collocation_mark = False

        # set the option to use scoring teashkeel chosing.
        self.select_by_score_enabled = False
        # set the option to do syntaxic Analysis
        # default value is True, can be disabled for debuging porpus
        self.enabled_syntaxic_analysis = True

        # set the option to do allow ajusting voaclization result,
        # for التقاء الساكنين
        # default value is True, can be disabled for debuging porpus
        self.enabled_ajust_vocalization = True

        # set the option to do Semantic Analysis
        # default value is True, can be disabled for debuging porpus
        self.enabled_semantic_analysis = True
        #~ self.enabled_semantic_analysis = False

        # enable the last mark (Harakat Al-I3rab)
        self.allow_syntax_last_mark = True

        # lexical analyzer
        self.analyzer = qalsadi.analex.Analex()
        self.analyzer.disable_allow_cache_use()
        #~ self.analyzer.enable_allow_cache_use()

        # syntaxic analyzer
        self.anasynt = aranasyn.anasyn.SyntaxAnalyzer()
        # semantic analyzer
        self.anasem = asmai.anasem.SemanticAnalyzer()
        #set the lexical analzer debugging
        self.analyzer.set_debug(debug)
        #set the lexical analzer  word limit
        self.analyzer.set_limit(self.limit)
        #collocations dictionary for statistical tashkeel
        self.collo = coll.CollocationClass(self.enabled_show_collocation_mark)

        # unknown vocalizer for unrecognized words
        self.unknown_vocalizer = unknown_tashkeel.UnknownTashkeel()
예제 #2
0
    def __init__(self, mycache_path=False):
        # configure logging 
        logging.basicConfig(level=logging.INFO)
        #~ logging.basicConfig(level=logging.DEBUG)
        self.logger = logging.getLogger(__name__)
        #~ self.logger.info("Cache Path %s"%mycache_path)

        # to display internal messages for debugging
        #~debug = False
        # limit of words to vocalize, default value is 1000 words.
        self.limit = 1000
        
        #  set the option value to enable the Last mark on voaclize 
        # words in output
        # default value is True, can be disabled for debuging porpus
        self.enabled_last_mark = True
        
        # set the option to do statistical vocalization based 
        # on collocations
        # default value is True, can be disabled for debuging porpus
        #self.enabled_stat_tashkeel = False    
        self.enabled_stat_tashkeel = True   
            
        # set the option to show the collocations marks
        # default value is False, can be enabled for debuging porpus
        self.enabled_show_collocation_mark = False
        
        # set the option to use scoring teashkeel chosing.
        self.select_by_score_enabled = False
        # set the option to do syntaxic Analysis
        # default value is True, can be disabled for debuging porpus
        self.enabled_syntaxic_analysis = True

        # set the option to do allow ajusting voaclization result, 
        # for التقاء الساكنين
        # default value is True, can be disabled for debuging porpus
        self.enabled_ajust_vocalization = True        

        # set the option to do Semantic Analysis
        # default value is True, can be disabled for debuging porpus        
        self.enabled_semantic_analysis = True
        #~ self.enabled_semantic_analysis = False

        # enable the last mark (Harakat Al-I3rab) 
        self.allow_syntax_last_mark = True 

        # lexical analyzer
        self.analyzer = qalsadi.analex.Analex(cache_path = mycache_path)
        #~ self.logger.info("Cache Path cache %s"%self.analyzer.cache.DB_PATH)
        #~ self.logger.info("Cache Path cache %s"%self.analyzer.cache.db.path)
        #~ self.analyzer.disable_allow_cache_use()
        self.analyzer.enable_allow_cache_use()

        # syntaxic analyzer
        self.anasynt = aranasyn.anasyn.SyntaxAnalyzer(cache_path = mycache_path)
        #~ self.logger.info("Cache Path cache syntax %s"%self.anasynt.cache.db.path)
        
        # to disable the training when do Tashkeel
        self.syntax_train_enabled = False
        
        # semantic analyzer
        self.anasem = asmai.anasem.SemanticAnalyzer(cache_path = mycache_path)
        #~ self.logger.info("Cache Path cache anasem %s"%self.anasem.syncache.db.path)
       
        #set the lexical analzer debugging
        self.analyzer.set_debug(debug)
        #set the lexical analzer  word limit
        self.analyzer.set_limit(self.limit)
        #collocations dictionary for statistical tashkeel
        self.collo = coll.CollocationClass(self.enabled_show_collocation_mark)
        
        # unknown vocalizer for unrecognized words
        self.unknown_vocalizer = unknown_tashkeel.UnknownTashkeel()