Exemplo n.º 1
0
    def __init__(self):

        freeling.util_init_locale("default")
        self.lang= "en"
        self.ipath="/usr/local"
        self.lpath=self.ipath + "/share/freeling/" + self.lang + "/"
        self.tk=freeling.tokenizer(self.lpath+"tokenizer.dat")
        self.sp=freeling.splitter(self.lpath+"splitter.dat")

        # create the analyzer with the required set of maco_options  
        self.morfo=freeling.maco(self.my_maco_options(self.lang,self.lpath));
        #  then, (de)activate required modules   
        self.morfo.set_active_options (False,  # UserMap 
                                  False,  # NumbersDetection,  
                                  True,  # PunctuationDetection,   
                                  False,  # DatesDetection,    
                                  True,  # DictionarySearch,  
                                  True,  # AffixAnalysis,  
                                  False, # CompoundAnalysis, 
                                  True,  # RetokContractions,
                                  False,  # MultiwordsDetection,  
                                  True,  # NERecognition,     
                                  False, # QuantitiesDetection,  
                                  True); # ProbabilityAssignment                 
        # create tagger
        self.tagger = freeling.hmm_tagger(self.lpath+"tagger.dat",True,2)


        # create sense annotator
        self.sen = freeling.senses(self.lpath+"senses.dat");
        # create sense disambiguator
        self.wsd = freeling.ukb(self.lpath+"ukb.dat");
        # create dependency parser
        self.parser = freeling.dep_treeler(self.lpath+"dep_treeler/dependences.dat");
def setup_freeling():
    """
    Loads FreeLing with the settings for Spanish. It's called inside preprocess(). Paths with the Spanish settings
    are read from the file config.ini. If your installation of FreeLing differs from the typical, chances are that
    you must change the paths.

    :return: 4 FreeLing components for pre-processing
    """
    config = configparser.ConfigParser()
    config.read('config.ini')
    pyfreeling.util_init_locale('default')
    tk = pyfreeling.tokenizer(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/tokenizer.dat')
    sp = pyfreeling.splitter(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/splitter.dat')
    umap = pyfreeling.RE_map(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/usermap.dat')

    op = pyfreeling.maco_options("es")
    op.set_data_files("",
                      config['FREELING']['Data'] + "common/punct.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/dicc.src",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/afixos.dat",
                      "",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/locucions.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/np.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/quantities.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/probabilitats.dat")

    mf = pyfreeling.maco(op)
    mf.set_active_options(False, True, True, True,
                          True, True, False, True,
                          True, True, True, True)
    return tk, sp, umap, mf
Exemplo n.º 3
0
def load_modules():
    global FREELING_MODULES
    freeling.util_init_locale("default")
    op = freeling.maco_options(FREELING_LANG)
    op.set_data_files("",
                      FREELING_DATA_DIR + "common/punct.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/dicc.src",
                      FREELING_DATA_DIR + FREELING_LANG + "/afixos.dat",
                      "",
                      FREELING_DATA_DIR + FREELING_LANG + "/locucions.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/np.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/quantities.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/probabilitats.dat")
    FREELING_MODULES["tk"] = freeling.tokenizer(FREELING_DATA_DIR + FREELING_LANG + "/tokenizer.dat")
    FREELING_MODULES["sp"] = freeling.splitter(FREELING_DATA_DIR + FREELING_LANG + "/splitter.dat")
    FREELING_MODULES["sid"] = FREELING_MODULES["sp"].open_session()
    FREELING_MODULES["mf"] = freeling.maco(op)
    FREELING_MODULES["mf"].set_active_options(False, True, True, True,
                               True, True, False, True,
                               True, True, True, True)
    FREELING_MODULES["tg"] = freeling.hmm_tagger(FREELING_DATA_DIR + FREELING_LANG + "/tagger.dat", True, 2)

    if os.path.isdir(FREELING_DATA_DIR + FREELING_LANG + "/chucker/grammar-chunk.dat"):
        FREELING_MODULES["parser"] = freeling.chart_parser(FREELING_DATA_DIR + FREELING_LANG
                                                           + "/chunker/grammar-chunk.dat")
Exemplo n.º 4
0
    def inicio(self):
        # set locale to an UTF8 compatible locale
        pyfreeling.util_init_locale("default")

        # get requested language from arg1, or English if not provided
        lang = "es"

        # get installation path to use from arg2, or use /usr/local if not provided
        ipath = "/usr/local"

        # path to language data
        lpath = ipath + "/share/freeling/"

        # create analyzers
        tk = pyfreeling.tokenizer(lpath + lang + "/" + "tokenizer.dat")
        sp = pyfreeling.splitter(lpath + lang + "/" + "splitter.dat")

        sid = sp.open_session()

        # tokenize input line into a list of words
        lw = tk.tokenize(self.texto)

        # Splitte de sentences
        ls = sp.split(sid, lw, False)

        sp.close_session(sid)

        return self.ProcessSentences(ls)
Exemplo n.º 5
0
def load_freeling(valor):
	# set locale to an UTF8 compatible locale
	pyfreeling.util_init_locale("default");
	# get requested language from arg1, or English if not provided
	lang = "pt"

	ipath = "/usr/local";

	# path to language data
	lpath = ipath + "/share/freeling/" + lang + "/"

	# create analyzers
	tk = pyfreeling.tokenizer(lpath + "tokenizer.dat");
	sp = pyfreeling.splitter(lpath + "splitter.dat");

	# create the analyzer with the required set of maco_options
	morfo = pyfreeling.maco(my_maco_options(lang, lpath));
	#  then, (de)activate required modules
	morfo.set_active_options(False,  # UserMap
							 True,  # NumbersDetection,
							 True,  # PunctuationDetection,
							 False,  # DatesDetection,
							 True,  # DictionarySearch,
							 True,  # AffixAnalysis,
							 False,  # CompoundAnalysis,
							 True,  # RetokContractions,
							 valor,  # MultiwordsDetection,
							 valor,  # NERecognition,
							 True,  # QuantitiesDetection,
							 True);  # ProbabilityAssignment

	# create tagger
	tagger = pyfreeling.hmm_tagger(lpath + "tagger.dat", True, 2)

	valores_freeling = freeling_values(lpath, tk, sp, morfo, tagger)


	return valores_freeling
Exemplo n.º 6
0
    def __init__(self):

        freeling.util_init_locale("default")
        self.lang = "en"
        self.ipath = "/usr/local"
        self.lpath = self.ipath + "/share/freeling/" + self.lang + "/"
        self.tk = freeling.tokenizer(self.lpath + "tokenizer.dat")
        self.sp = freeling.splitter(self.lpath + "splitter.dat")

        # create the analyzer with the required set of maco_options
        self.morfo = freeling.maco(self.my_maco_options(self.lang, self.lpath))
        #  then, (de)activate required modules
        self.morfo.set_active_options(
            False,  # UserMap 
            False,  # NumbersDetection,  
            True,  # PunctuationDetection,   
            False,  # DatesDetection,    
            True,  # DictionarySearch,  
            True,  # AffixAnalysis,  
            False,  # CompoundAnalysis, 
            True,  # RetokContractions,
            False,  # MultiwordsDetection,  
            True,  # NERecognition,     
            False,  # QuantitiesDetection,  
            True)
        # ProbabilityAssignment
        # create tagger
        self.tagger = freeling.hmm_tagger(self.lpath + "tagger.dat", True, 2)

        # create sense annotator
        self.sen = freeling.senses(self.lpath + "senses.dat")
        # create sense disambiguator
        self.wsd = freeling.ukb(self.lpath + "ukb.dat")
        # create dependency parser
        self.parser = freeling.dep_treeler(self.lpath +
                                           "dep_treeler/dependences.dat")
Exemplo n.º 7
0
pyfreeling.util_init_locale("default")

# get requested language from arg1, or English if not provided
lang = "en"
if len(sys.argv) > 1: lang = sys.argv[1]

# get installation path to use from arg2, or use /usr/local if not provided
ipath = "/home/flng"
if len(sys.argv) > 2: ipath = sys.argv[2]

# path to language data
lpath = ipath + "/share/freeling/" + lang + "/"

# create analyzers
tk = pyfreeling.tokenizer(lpath + "tokenizer.dat")
sp = pyfreeling.splitter(lpath + "splitter.dat")

# create the analyzer with the required set of maco_options
morfo = pyfreeling.maco(my_maco_options(lang, lpath))
#  then, (de)activate required modules
morfo.set_active_options(
    False,  # UserMap 
    True,  # NumbersDetection,  
    True,  # PunctuationDetection,   
    True,  # DatesDetection,    
    True,  # DictionarySearch,  
    True,  # AffixAnalysis,  
    False,  # CompoundAnalysis, 
    True,  # RetokContractions,
    True,  # MultiwordsDetection,  
    True,  # NERecognition,     
Exemplo n.º 8
0
# create language detector. Used just to show it. Results are printed
# but ignored (after, it is assumed language is LANG)
la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat")

# create options set for maco analyzer. Default values are Ok, except for data files.
LANG = "es"
op = pyfreeling.maco_options(LANG)
op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                  DATA + LANG + "/afixos.dat", "",
                  DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                  DATA + LANG + "/quantities.dat",
                  DATA + LANG + "/probabilitats.dat")

# create analyzers
tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat")
sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat")
sid = sp.open_session()
mf = pyfreeling.maco(op)

# activate mmorpho odules to be used in next call
mf.set_active_options(
    False,
    True,
    True,
    True,  # select which among created
    True,
    True,
    True,
    True,  # submodules are to be used.
    False,
    False,
Exemplo n.º 9
0
# create options set for maco analyzer. Default values are Ok, except for data files.
LANG="es";
op= pyfreeling.maco_options(LANG);
op.set_data_files( "", 
                   DATA + "common/punct.dat",
                   DATA + LANG + "/dicc.src",
                   DATA + LANG + "/afixos.dat",
                   "",
                   DATA + LANG + "/locucions.dat", 
                   DATA + LANG + "/np.dat",
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

# create analyzers
tk=pyfreeling.tokenizer(DATA+LANG+"/tokenizer.dat");
sp=pyfreeling.splitter(DATA+LANG+"/splitter.dat");
sid=sp.open_session();
mf=pyfreeling.maco(op);

# activate mmorpho odules to be used in next call
mf.set_active_options(False, True, True, True,  # select which among created 
                      True, True, False, True,  # submodules are to be used. 
                      True, True, True, True ); # default: all created submodules are used

# create tagger, sense anotator, and parsers
tg=pyfreeling.hmm_tagger(DATA+LANG+"/tagger.dat",True,2);
sen=pyfreeling.senses(DATA+LANG+"/senses.dat");
parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

# process input text
Exemplo n.º 10
0
def set_up_analyzer():
    ## Check whether we know where to find FreeLing data files
    if "FREELINGDIR" not in os.environ:
        if sys.platform == "win32" or sys.platform == "win64":
            os.environ["FREELINGDIR"] = "C:\\Program Files"
        else:
            os.environ["FREELINGDIR"] = "/usr/local"
        #print("FREELINGDIR environment variable not defined, trying ", os.environ["FREELINGDIR"], file=sys.stderr)

        if not os.path.exists(os.environ["FREELINGDIR"] + "/share/freeling"):
            #print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr)
            sys.exit(1)

    # Location of FreeLing configuration files.
    DATA = os.environ["FREELINGDIR"] + "/share/freeling/"

    # Init locales
    pyfreeling.util_init_locale("default")

    #create language detector. Used just to show it. Results are printed
    # but ignored (after, it is assumed language is LANG)
    la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat")

    # create options set for maco analyzer. Default values are Ok, except for data files.
    LANG = "ru"
    op = pyfreeling.maco_options(LANG)
    op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                      DATA + LANG + "/afixos.dat", "",
                      DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                      DATA + LANG + "/quantities.dat",
                      DATA + LANG + "/probabilitats.dat")

    # create analyzers
    tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat")
    sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat")
    sid = sp.open_session()
    mf = pyfreeling.maco(op)

    # activate mmorpho odules to be used in next call
    mf.set_active_options(
        False,
        True,
        True,
        True,  # select which among created 
        True,
        True,
        False,
        True,  # submodules are to be used. 
        True,
        True,
        True,
        True)
    # default: all created submodules are used

    # create tagger, sense anotator, and parsers
    tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
    sen = pyfreeling.senses(DATA + LANG + "/senses.dat")
    #parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
    #dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

    return (tk, sp, sid, mf, tg, sen)