def __init__(self): freeling.util_init_locale("default") self.lang= "en" self.ipath="/usr/local" self.lpath=self.ipath + "/share/freeling/" + self.lang + "/" self.tk=freeling.tokenizer(self.lpath+"tokenizer.dat") self.sp=freeling.splitter(self.lpath+"splitter.dat") # create the analyzer with the required set of maco_options self.morfo=freeling.maco(self.my_maco_options(self.lang,self.lpath)); # then, (de)activate required modules self.morfo.set_active_options (False, # UserMap False, # NumbersDetection, True, # PunctuationDetection, False, # DatesDetection, True, # DictionarySearch, True, # AffixAnalysis, False, # CompoundAnalysis, True, # RetokContractions, False, # MultiwordsDetection, True, # NERecognition, False, # QuantitiesDetection, True); # ProbabilityAssignment # create tagger self.tagger = freeling.hmm_tagger(self.lpath+"tagger.dat",True,2) # create sense annotator self.sen = freeling.senses(self.lpath+"senses.dat"); # create sense disambiguator self.wsd = freeling.ukb(self.lpath+"ukb.dat"); # create dependency parser self.parser = freeling.dep_treeler(self.lpath+"dep_treeler/dependences.dat");
def setup_freeling(): """ Loads FreeLing with the settings for Spanish. It's called inside preprocess(). Paths with the Spanish settings are read from the file config.ini. If your installation of FreeLing differs from the typical, chances are that you must change the paths. :return: 4 FreeLing components for pre-processing """ config = configparser.ConfigParser() config.read('config.ini') pyfreeling.util_init_locale('default') tk = pyfreeling.tokenizer(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/tokenizer.dat') sp = pyfreeling.splitter(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/splitter.dat') umap = pyfreeling.RE_map(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/usermap.dat') op = pyfreeling.maco_options("es") op.set_data_files("", config['FREELING']['Data'] + "common/punct.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/dicc.src", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/afixos.dat", "", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/locucions.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/np.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/quantities.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/probabilitats.dat") mf = pyfreeling.maco(op) mf.set_active_options(False, True, True, True, True, True, False, True, True, True, True, True) return tk, sp, umap, mf
def load_modules(): global FREELING_MODULES freeling.util_init_locale("default") op = freeling.maco_options(FREELING_LANG) op.set_data_files("", FREELING_DATA_DIR + "common/punct.dat", FREELING_DATA_DIR + FREELING_LANG + "/dicc.src", FREELING_DATA_DIR + FREELING_LANG + "/afixos.dat", "", FREELING_DATA_DIR + FREELING_LANG + "/locucions.dat", FREELING_DATA_DIR + FREELING_LANG + "/np.dat", FREELING_DATA_DIR + FREELING_LANG + "/quantities.dat", FREELING_DATA_DIR + FREELING_LANG + "/probabilitats.dat") FREELING_MODULES["tk"] = freeling.tokenizer(FREELING_DATA_DIR + FREELING_LANG + "/tokenizer.dat") FREELING_MODULES["sp"] = freeling.splitter(FREELING_DATA_DIR + FREELING_LANG + "/splitter.dat") FREELING_MODULES["sid"] = FREELING_MODULES["sp"].open_session() FREELING_MODULES["mf"] = freeling.maco(op) FREELING_MODULES["mf"].set_active_options(False, True, True, True, True, True, False, True, True, True, True, True) FREELING_MODULES["tg"] = freeling.hmm_tagger(FREELING_DATA_DIR + FREELING_LANG + "/tagger.dat", True, 2) if os.path.isdir(FREELING_DATA_DIR + FREELING_LANG + "/chucker/grammar-chunk.dat"): FREELING_MODULES["parser"] = freeling.chart_parser(FREELING_DATA_DIR + FREELING_LANG + "/chunker/grammar-chunk.dat")
def load_freeling(valor): # set locale to an UTF8 compatible locale pyfreeling.util_init_locale("default"); # get requested language from arg1, or English if not provided lang = "pt" ipath = "/usr/local"; # path to language data lpath = ipath + "/share/freeling/" + lang + "/" # create analyzers tk = pyfreeling.tokenizer(lpath + "tokenizer.dat"); sp = pyfreeling.splitter(lpath + "splitter.dat"); # create the analyzer with the required set of maco_options morfo = pyfreeling.maco(my_maco_options(lang, lpath)); # then, (de)activate required modules morfo.set_active_options(False, # UserMap True, # NumbersDetection, True, # PunctuationDetection, False, # DatesDetection, True, # DictionarySearch, True, # AffixAnalysis, False, # CompoundAnalysis, True, # RetokContractions, valor, # MultiwordsDetection, valor, # NERecognition, True, # QuantitiesDetection, True); # ProbabilityAssignment # create tagger tagger = pyfreeling.hmm_tagger(lpath + "tagger.dat", True, 2) valores_freeling = freeling_values(lpath, tk, sp, morfo, tagger) return valores_freeling
def __init__(self): freeling.util_init_locale("default") self.lang = "en" self.ipath = "/usr/local" self.lpath = self.ipath + "/share/freeling/" + self.lang + "/" self.tk = freeling.tokenizer(self.lpath + "tokenizer.dat") self.sp = freeling.splitter(self.lpath + "splitter.dat") # create the analyzer with the required set of maco_options self.morfo = freeling.maco(self.my_maco_options(self.lang, self.lpath)) # then, (de)activate required modules self.morfo.set_active_options( False, # UserMap False, # NumbersDetection, True, # PunctuationDetection, False, # DatesDetection, True, # DictionarySearch, True, # AffixAnalysis, False, # CompoundAnalysis, True, # RetokContractions, False, # MultiwordsDetection, True, # NERecognition, False, # QuantitiesDetection, True) # ProbabilityAssignment # create tagger self.tagger = freeling.hmm_tagger(self.lpath + "tagger.dat", True, 2) # create sense annotator self.sen = freeling.senses(self.lpath + "senses.dat") # create sense disambiguator self.wsd = freeling.ukb(self.lpath + "ukb.dat") # create dependency parser self.parser = freeling.dep_treeler(self.lpath + "dep_treeler/dependences.dat")
lang = "en" if len(sys.argv) > 1: lang = sys.argv[1] # get installation path to use from arg2, or use /usr/local if not provided ipath = "/home/flng" if len(sys.argv) > 2: ipath = sys.argv[2] # path to language data lpath = ipath + "/share/freeling/" + lang + "/" # create analyzers tk = pyfreeling.tokenizer(lpath + "tokenizer.dat") sp = pyfreeling.splitter(lpath + "splitter.dat") # create the analyzer with the required set of maco_options morfo = pyfreeling.maco(my_maco_options(lang, lpath)) # then, (de)activate required modules morfo.set_active_options( False, # UserMap True, # NumbersDetection, True, # PunctuationDetection, True, # DatesDetection, True, # DictionarySearch, True, # AffixAnalysis, False, # CompoundAnalysis, True, # RetokContractions, True, # MultiwordsDetection, True, # NERecognition, False, # QuantitiesDetection, True) # ProbabilityAssignment
la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat") # create options set for maco analyzer. Default values are Ok, except for data files. LANG = "es" op = pyfreeling.maco_options(LANG) op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = pyfreeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( False, True, True, True, # select which among created True, True, True, True, # submodules are to be used. False, False, True, True) # default: all created submodules are used
DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # get installation path to use from arg2, or use /usr/local if not provided #ipath = "/usr/local/Cellar/freeling/4.0_4"; ipath = "/usr/local/" # path to language data lpath = ipath + "/share/freeling/" + lang + "/" # create analyzers tk = freeling.tokenizer(lpath + "tokenizer.dat") sp = freeling.splitter(lpath + "splitter.dat") sid = sp.open_session() # create the analyzer with the required set of maco_options morfo = freeling.maco(op) # then, (de)activate required modules morfo.set_active_options( False, # UserMap True, # NumbersDetection, True, # PunctuationDetection, True, # DatesDetection, True, # DictionarySearch, True, # AffixAnalysis, False, # CompoundAnalysis, True, # RetokContractions, True, # MultiwordsDetection, True, # NERecognition, False, # QuantitiesDetection, True) # ProbabilityAssignment
op= pyfreeling.maco_options(LANG); op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat"); # create analyzers tk=pyfreeling.tokenizer(DATA+LANG+"/tokenizer.dat"); sp=pyfreeling.splitter(DATA+LANG+"/splitter.dat"); sid=sp.open_session(); mf=pyfreeling.maco(op); # activate mmorpho odules to be used in next call mf.set_active_options(False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True ); # default: all created submodules are used # create tagger, sense anotator, and parsers tg=pyfreeling.hmm_tagger(DATA+LANG+"/tagger.dat",True,2); sen=pyfreeling.senses(DATA+LANG+"/senses.dat"); parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat"); dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol()); # process input text lin=sys.stdin.readline();
def set_up_analyzer(): ## Check whether we know where to find FreeLing data files if "FREELINGDIR" not in os.environ: if sys.platform == "win32" or sys.platform == "win64": os.environ["FREELINGDIR"] = "C:\\Program Files" else: os.environ["FREELINGDIR"] = "/usr/local" #print("FREELINGDIR environment variable not defined, trying ", os.environ["FREELINGDIR"], file=sys.stderr) if not os.path.exists(os.environ["FREELINGDIR"] + "/share/freeling"): #print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr) sys.exit(1) # Location of FreeLing configuration files. DATA = os.environ["FREELINGDIR"] + "/share/freeling/" # Init locales pyfreeling.util_init_locale("default") #create language detector. Used just to show it. Results are printed # but ignored (after, it is assumed language is LANG) la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat") # create options set for maco analyzer. Default values are Ok, except for data files. LANG = "ru" op = pyfreeling.maco_options(LANG) op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = pyfreeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger, sense anotator, and parsers tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2) sen = pyfreeling.senses(DATA + LANG + "/senses.dat") #parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat"); #dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol()); return (tk, sp, sid, mf, tg, sen)