Пример #1
0
    def __init__(self, dir_freeling=None):

        if dir_freeling is not None:
            self.data_dir = dir_freeling
        else:
            self.data_dir = "/usr/share/freeling/"

        self.data_dir_common = self.data_dir + "common/"

        freeling.util_init_locale("default")

        self.la = freeling.lang_ident(self.data_dir_common + "lang_ident/ident.dat")
Пример #2
0
    def __init__(self, text):
        super().__init__(text)
        freeling.util_init_locale("default")
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")
        op = freeling.maco_options("es")
        op.set_data_files(
            "",
            DATA + "common/punct.dat",
            DATA + LANG + "/dicc.src",
            DATA + LANG + "/afixos.dat",
            "",
            DATA + LANG + "/locucions.dat",
            DATA + LANG + "/np.dat",
            DATA + LANG + "/quantities.dat",
            DATA + LANG + "/probabilitats.dat"
        )

        # create analyzers
        self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        self.sid = self.sp.open_session()
        self.mf = freeling.maco(op)

        # activate mmorpho odules to be used in next call
        self.mf.set_active_options(
            False,  # umap User map module
            True,  # num Number Detection
            True,  # pun Punctuation Detection
            True,  # dat Date Detection
            True,  # dic Dictionary Search
            True,  # aff
            False,  # com
            True,  # rtk
            True,  # mw Multiword Recognition
            True,  # ner  Name Entity Recongnition
            True,  # qt Quantity Recognition
            True  # prb Probability Assignment And Guesser
        )  # default: all created submodules are used

        # create tagger, sense anotator, and parsers
        self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
        self.sen = freeling.senses(DATA + LANG + "/senses.dat")
        self.parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat", self.parser.get_start_symbol())
Пример #3
0
    def inicializa(self):

        FREELINGDIR = "/usr/local"

        DATA = FREELINGDIR + "/share/freeling/"
        LANG = self.lang

        freeling.util_init_locale("default")

        # create language analyzer
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # opciones para maco analyzer.
        op = freeling.maco_options("es")
        op.set_active_modules(0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        op.set_data_files("", DATA + LANG + "/locucions.dat",
                          DATA + LANG + "/quantities.dat",
                          DATA + LANG + "/afixos.dat",
                          DATA + LANG + "/probabilitats.dat",
                          DATA + LANG + "/dicc.src", DATA + LANG + "/np.dat",
                          DATA + "common/punct.dat",
                          DATA + LANG + "/corrector/corrector.dat")

        # crear analyzers
        self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        self.mf = freeling.maco(op)

        self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", 1, 2)
        self.sen = freeling.senses(DATA + LANG + "/senses.dat")
        self.nec = freeling.nec(DATA + LANG + "/nerc/nec/nec-ab-rich.dat")
        # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat");

        self.parser = freeling.chart_parser(DATA + LANG +
                                            "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(DATA + LANG + "/dep/dependences.dat",
                                      self.parser.get_start_symbol())

        con_data={'user':'******','password':'******','host':'127.0.0.1', \
'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True}

        self.con = my.connect(**con_data)
Пример #4
0
    def inicializa(self):

        FREELINGDIR = "/usr/local";
        
        DATA = FREELINGDIR+"/share/freeling/";
        LANG=self.lang;
        
        freeling.util_init_locale("default");
        
        # create language analyzer
        self.la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat");
        
        # opciones para maco analyzer. 
        op= freeling.maco_options("es");
        op.set_active_modules(0,1,1,1,1,1,1,1,1,1)
        op.set_data_files("",DATA+LANG+"/locucions.dat", DATA+LANG+"/quantities.dat", 
                          DATA+LANG+"/afixos.dat", DATA+LANG+"/probabilitats.dat", 
                          DATA+LANG+"/dicc.src", DATA+LANG+"/np.dat",  
                          DATA+"common/punct.dat",DATA+LANG+"/corrector/corrector.dat");
        
        # crear analyzers
        self.tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
        self.sp=freeling.splitter(DATA+LANG+"/splitter.dat");
        self.mf=freeling.maco(op);
        
        self.tg=freeling.hmm_tagger(DATA+LANG+"/tagger.dat",1,2);
        self.sen=freeling.senses(DATA+LANG+"/senses.dat");
        self.nec=freeling.nec(DATA+LANG+"/nerc/nec/nec-ab-rich.dat");
        # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat");
        
        self.parser= freeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
        self.dep=freeling.dep_txala(DATA+LANG+"/dep/dependences.dat", self.parser.get_start_symbol());

        con_data={'user':'******','password':'******','host':'127.0.0.1', \
'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True}

        self.con = my.connect(**con_data)
Пример #5
0
p_comunes = [];
for p in open("lexicon_total.txt", encoding="latin-1"):
      p_comunes.append(p.replace("\n",""));

FREELINGDIR = "/usr/local";

#sys.stdin = io.TextIOWrapper(sys.stdin.buffer,encoding='latin-1');
#sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8');

DATA = FREELINGDIR+"/share/freeling/";
LANG="es";

freeling.util_init_locale("default");

# create language analyzer
la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat");

# create options set for maco analyzer. Default values are Ok, except for data files.
op= freeling.maco_options("es");
op.set_data_files( "", 
                   DATA + "common/punct.dat",
                   DATA + LANG + "/dicc.src",
                   DATA + LANG + "/afixos.dat",
                   "",
                   DATA + LANG + "/locucions.dat", 
                   DATA + LANG + "/np.dat",
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

# create analyzers
tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
Пример #6
0
    def __init__(self):

        lang = 'fr'
        ComplexityLanguage.__init__(self, lang)

        ## Modify this line to be your FreeLing installation directory
        FREELINGDIR = "/home/garciacumbreras18/dist/freeling"
        DATA = FREELINGDIR + "/data/"
        CLASSDIR = ""
        self.lang = lang
        freeling.util_init_locale("default")

        # create language analyzer
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options(lang)
        op.set_data_files(
            "", DATA + "common/punct.dat", DATA + lang + "/dicc.src",
            DATA + lang + "/afixos.dat", "", DATA + lang + "/locucions.dat",
            DATA + lang + "/np.dat", DATA + lang + "/quantities.dat",
            DATA + lang + "/probabilitats.dat")

        # create analyzers
        self.tk = freeling.tokenizer(DATA + lang + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + lang + "/splitter.dat")
        self.mf = freeling.maco(op)

        # activate mmorpho modules to be used in next call
        self.mf.set_active_options(
            False,
            True,
            True,
            True,  # select which among created 
            True,
            True,
            False,
            True,  # submodules are to be used. 
            True,
            True,
            True,
            True)  # default: all created submodules are used

        # create tagger and sense anotator
        self.tg = freeling.hmm_tagger(DATA + lang + "/tagger.dat", True, 2)
        self.sen = freeling.senses(DATA + lang + "/senses.dat")

        f = open(CLASSDIR + '/home/garciacumbreras18/DaleChall.txt')
        lines = f.readlines()
        f.close()

        listDaleChall = []
        for l in lines:
            data = l.strip().split()
            listDaleChall += data
        self.listDaleChall = listDaleChall
        """
        config es una lista de valores booleanos que activa o desactivan el cálculo de una medida
        config = [
            True|False,         # KANDEL MODELS
            True|False,         # DALE CHALL
            True|False,         # SOL
            ]
        """
        self.config += [True, True, True]
        self.metricsStr.extend(['KANDEL-MODELS', 'DALE CHALL', 'SOL'])

        self.configExtend += [True, True]
        self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
Пример #7
0

## ----------------------------------------------
## -------------    MAIN PROGRAM  ---------------
## ----------------------------------------------

## Modify this line to be your FreeLing installation directory
FREELINGDIR = "/usr/local"

DATA = FREELINGDIR + "/share/freeling/"
LANG = "es"

freeling.util_init_locale("default")

# create language analyzer
la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

# create options set for maco analyzer. Default values are Ok, except for data files.
op = freeling.maco_options("es")
op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                  DATA + LANG + "/afixos.dat", "",
                  DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                  DATA + LANG + "/quantities.dat",
                  DATA + LANG + "/probabilitats.dat")

# create analyzers
tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
sp = freeling.splitter(DATA + LANG + "/splitter.dat")
sid = sp.open_session()
mf = freeling.maco(op)
Пример #8
0
    def __init__(self, lang='it'):

        ## Modify this line to be your FreeLing installation directory
        FREELINGDIR = "/home/garciacumbreras18/dist/freeling"
        DATA = FREELINGDIR + "/data/"

        self.DATA = DATA
        self.lang = lang
        freeling.util_init_locale("default")

        # create language analyzer
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options(lang)
        op.set_data_files("", self.DATA + "common/punct.dat",
                          self.DATA + self.lang + "/dicc.src",
                          self.DATA + self.lang + "/afixos.dat", "",
                          self.DATA + self.lang + "/locucions.dat",
                          self.DATA + self.lang + "/np.dat", "",
                          self.DATA + self.lang + "/probabilitats.dat")

        # create analyzers
        self.tk = freeling.tokenizer(self.DATA + self.lang + "/tokenizer.dat")
        self.sp = freeling.splitter(self.DATA + self.lang + "/splitter.dat")
        self.mf = freeling.maco(op)

        # activate mmorpho modules to be used in next call
        self.mf.set_active_options(
            False,
            True,
            True,
            True,  # select which among created 
            True,
            True,
            False,
            True,  # submodules are to be used. 
            True,
            True,
            True,
            True)  # default: all created submodules are used
        # create tagger
        self.tg = freeling.hmm_tagger(self.DATA + self.lang + "/tagger.dat",
                                      True, 2)
        self.sen = freeling.senses(DATA + lang + "/senses.dat")
        """ 
        config es una lista de valores booleanos que activa o desactivan el cálculo de una medida
        config = [
            True|False,         # PUNCTUATION MARKS
            True|False,         # SCI
            True|False,         # ARI 
            True|False,         # MU
            True|False,         # Flesch-Vaca
            True|False,         # Gulpease
            ]
         Si config == None se calculan todas las métricas de complejidad soportadas
        """
        self.config = [True, True, True, True, True, True]
        self.metricsIt = [
            'AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA',
            'GULPEASE'
        ]

        self.configExtend = [True, True, True, True, True]
        self.metricsItExtend = [
            'MEAN WORDS', 'STD WORDS', 'COMPLEX SENTENCES', 'MEAN SYLLABLES',
            'STD SYLLABLES'
        ]
Пример #9
0
freeling.util_init_locale("default") 

op = freeling.maco_options("pt")
op.set_active_modules(1,1,1,1,1,1,1,1,1,1,0)
op.set_data_files("usermap.dat",
                  DATA+LANG+"/locucions.dat", 
                  DATA+LANG+"/quantities.dat", 
                  DATA+LANG+"/afixos.dat",
                  DATA+LANG+"/probabilitats.dat", 
                  DATA+LANG+"/dicc.src", 
                  DATA+LANG+"/np.dat",  
                  DATA+"common/punct.dat", 
                  "")
op.set_retok_contractions(False)

lg  = freeling.lang_ident(DATA+"common/lang_ident/ident-few.dat")
mf  = freeling.maco(op)
tk  = freeling.tokenizer(DATA+LANG+"/tokenizer.dat")
sp  = freeling.splitter(DATA+LANG+"/splitter.dat")
tg  = freeling.hmm_tagger(DATA+LANG+"/tagger.dat",1,2)
sen = freeling.senses(DATA+LANG+"/senses.dat");
ukb = freeling.ukb(DATA+LANG+"/ukb.dat")

def tag (obj):
    sent = obj["text"]
    out = obj
    lang = lg.identify_language(sent)
    l = tk.tokenize(sent)
    ls = sp.split(l,1) # old value 0
    ls = mf.analyze(ls)
    ls = tg.analyze(ls)
Пример #10
0
    def fullParsing(self, text, sentimentText):

        ## Modify this line to be your FreeLing installation directory
        FREELINGDIR = "/usr/local"

        DATA = FREELINGDIR + "/share/freeling/"
        LANG = "es"

        freeling.util_init_locale("default")

        # create language analyzer
        la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options("es")
        op.set_data_files(
            "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
            DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat",
            DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat",
            DATA + LANG + "/probabilitats.dat")

        # create analyzers
        tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        sid = sp.open_session()
        mf = freeling.maco(op)

        # activate mmorpho odules to be used in next call
        mf.set_active_options(
            False,
            True,
            True,
            True,  # select which among created 
            True,
            True,
            False,
            True,  # submodules are to be used. 
            True,
            True,
            True,
            True)
        # default: all created submodules are used

        # create tagger, sense anotator, and parsers
        tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
        sen = freeling.senses(DATA + LANG + "/senses.dat")
        parser = freeling.chart_parser(DATA + LANG +
                                       "/chunker/grammar-chunk.dat")
        dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat",
                                 parser.get_start_symbol())

        #split Target as a list
        #print(sentimentText)
        sentimentText += '.'
        if sentimentText[0] == '@':
            sentimentText = sentimentText[1:]
        target = tk.tokenize(sentimentText)
        targets = sp.split(sid, target, True)

        targets = mf.analyze(targets)
        targets = parser.analyze(targets)
        targets = dep.analyze(targets)

        for s in targets:
            targetr = s.get_parse_tree()
            targetList = self.getTreeAsList(targetr, 0)
            del targetList[-1]
        #print(targetList)

        # process input text
        lin = text
        if lin[0] == '@':
            lin = lin[1:]

        #while (lin) :

        l = tk.tokenize(lin)
        ls = sp.split(sid, l, True)

        ls = mf.analyze(ls)
        ls = parser.analyze(ls)
        ls = dep.analyze(ls)

        finalType = None
        finalList = None

        ## output results
        for s in ls:
            tr = s.get_parse_tree()
            #self.printTree(tr, 0);
            wordType, wordList = self.getTypeNode(tr, 0, targetList)
            if finalType is None:
                if wordType is not None:
                    finalType = wordType
                    finalList = wordList
        # clean up
        sp.close_session(sid)

        return finalType, finalList
Пример #11
0
    def __init__(self, text):
        super().__init__(text)
        self.stop_words = set(stopwords.words('spanish') + list(punctuation))
        self._cleaned_text = list()
        freeling.util_init_locale("default")

        # create language analyzer
        la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options("es")
        op.set_data_files(
            "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
            DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat",
            DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat",
            DATA + LANG + "/probabilitats.dat")

        # create analyzers
        tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        sid = sp.open_session()
        mf = freeling.maco(op)

        # activate mmorpho odules to be used in next call
        mf.set_active_options(
            True,
            True,
            True,
            True,  # select which among created
            True,
            True,
            True,
            True,  # submodules are to be used.
            True,
            True,
            True,
            True)  # default: all created submodules are used

        # create tagger, sense anotator, and parsers
        tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
        sen = freeling.senses(DATA + LANG + "/senses.dat")
        parser = freeling.chart_parser(DATA + LANG +
                                       "/chunker/grammar-chunk.dat")

        l = tk.tokenize(self.text)
        ls = sp.split(sid, l, False)

        ls = mf.analyze(ls)
        ls = tg.analyze(ls)
        ls = sen.analyze(ls)
        ls = parser.analyze(ls)

        for s in ls:
            ws = s.get_words()
            for w in ws:
                # Removing all stopped words, including prepositions, conjunctions, interjections and punctuation
                tag = w.get_tag()
                word = w.get_form()
                if tag.startswith("S") or \
                    tag.startswith("I") or \
                    tag.startswith("C") or \
                    tag.startswith("F") or \
                    tag.startswith("D") or \
                    tag.startswith("P"):
                    pass
                else:
                    self._cleaned_text.append("{}-{}".format(word, tag))