def __init__(self):

        freeling.util_init_locale("default")
        self.lang= "en"
        self.ipath="/usr/local"
        self.lpath=self.ipath + "/share/freeling/" + self.lang + "/"
        self.tk=freeling.tokenizer(self.lpath+"tokenizer.dat")
        self.sp=freeling.splitter(self.lpath+"splitter.dat")

        # create the analyzer with the required set of maco_options  
        self.morfo=freeling.maco(self.my_maco_options(self.lang,self.lpath));
        #  then, (de)activate required modules   
        self.morfo.set_active_options (False,  # UserMap 
                                  False,  # NumbersDetection,  
                                  True,  # PunctuationDetection,   
                                  False,  # DatesDetection,    
                                  True,  # DictionarySearch,  
                                  True,  # AffixAnalysis,  
                                  False, # CompoundAnalysis, 
                                  True,  # RetokContractions,
                                  False,  # MultiwordsDetection,  
                                  True,  # NERecognition,     
                                  False, # QuantitiesDetection,  
                                  True); # ProbabilityAssignment                 
        # create tagger
        self.tagger = freeling.hmm_tagger(self.lpath+"tagger.dat",True,2)


        # create sense annotator
        self.sen = freeling.senses(self.lpath+"senses.dat");
        # create sense disambiguator
        self.wsd = freeling.ukb(self.lpath+"ukb.dat");
        # create dependency parser
        self.parser = freeling.dep_treeler(self.lpath+"dep_treeler/dependences.dat");
示例#2
0
def process_file(essay_lst, x):
    index = 1
    for entry in essay_lst:
        # create tagger
        essay = entry[1]
        id = entry[0]
        tagger = freeling.hmm_tagger(lpath + "tagger.dat", True, 2)

        # create sense annotator
        sen = freeling.senses(lpath + "senses.dat")

        # create sense disambiguator
        wsd = freeling.ukb(lpath + "ukb.dat")

        # create dependency parser
        parser = freeling.chart_parser(lpath + "/chunker/grammar-chunk.dat")
        dep = freeling.dep_txala(lpath + "/dep_txala/dependences.dat",
                                 parser.get_start_symbol())

        # tokenize input line into a list of words
        lw = tk.tokenize(essay)
        # split list of words in sentences, return list of sentences
        ls = sp.split(lw)

        # perform morphosyntactic analysis and disambiguation
        ls = morfo.analyze(ls)
        ls = tagger.analyze(ls)

        # annotate and disambiguate senses
        ls = sen.analyze(ls)
        ls = wsd.analyze(ls)
        # parse sentences
        ls = parser.analyze(ls)
        ls = dep.analyze(ls)

        # do whatever is needed with processed sentences
        if x == 2:
            essays_vacation_tagged.append((id, ProcessSentences(ls)))
        elif x == 3:
            essays_famous_tagged.append((id, ProcessSentences(ls)))
        print(index)
        index += 1
    def __init__(self):

        freeling.util_init_locale("default")
        self.lang = "en"
        self.ipath = "/usr/local"
        self.lpath = self.ipath + "/share/freeling/" + self.lang + "/"
        self.tk = freeling.tokenizer(self.lpath + "tokenizer.dat")
        self.sp = freeling.splitter(self.lpath + "splitter.dat")

        # create the analyzer with the required set of maco_options
        self.morfo = freeling.maco(self.my_maco_options(self.lang, self.lpath))
        #  then, (de)activate required modules
        self.morfo.set_active_options(
            False,  # UserMap 
            False,  # NumbersDetection,  
            True,  # PunctuationDetection,   
            False,  # DatesDetection,    
            True,  # DictionarySearch,  
            True,  # AffixAnalysis,  
            False,  # CompoundAnalysis, 
            True,  # RetokContractions,
            False,  # MultiwordsDetection,  
            True,  # NERecognition,     
            False,  # QuantitiesDetection,  
            True)
        # ProbabilityAssignment
        # create tagger
        self.tagger = freeling.hmm_tagger(self.lpath + "tagger.dat", True, 2)

        # create sense annotator
        self.sen = freeling.senses(self.lpath + "senses.dat")
        # create sense disambiguator
        self.wsd = freeling.ukb(self.lpath + "ukb.dat")
        # create dependency parser
        self.parser = freeling.dep_treeler(self.lpath +
                                           "dep_treeler/dependences.dat")
示例#4
0
    True,  # DatesDetection,    
    True,  # DictionarySearch,  
    True,  # AffixAnalysis,  
    False,  # CompoundAnalysis, 
    True,  # RetokContractions,
    True,  # MultiwordsDetection,  
    True,  # NERecognition,     
    False,  # QuantitiesDetection,  
    True)
# ProbabilityAssignment

# create tagger
tagger = pyfreeling.hmm_tagger(lpath + "tagger.dat", True, 2)

# create sense annotator
sen = pyfreeling.senses(lpath + "senses.dat")
# create sense disambiguator
wsd = pyfreeling.ukb(lpath + "ukb.dat")
# create dependency parser
parser = pyfreeling.dep_treeler(lpath + "treeler/dependences.dat")

# process input text
text = "".join(sys.stdin.readlines())

# tokenize input line into a list of words
lw = tk.tokenize(text)
# split list of words in sentences, return list of sentences
ls = sp.split(lw)

# perform morphosyntactic analysis and disambiguation
ls = morfo.analyze(ls)
示例#5
0
    False,
    True,
    True,
    True,  # select which among created
    True,
    True,
    True,
    True,  # submodules are to be used.
    False,
    False,
    True,
    True)  # default: all created submodules are used

# create tagger, sense anotator, and parsers
tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
sen = pyfreeling.senses(DATA + LANG + "/senses.dat")
parser = pyfreeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat")
dep = pyfreeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat",
                           parser.get_start_symbol())


def distribution(hospital_files_names):
    dist_hospitals = {}
    for name, files in hospital_file_names.items():
        if name != 'all':
            dist_hospitals[name] = len(files)

    return dist_hospitals


def file_size(hospitals_files_name, dir_annotated_corpora):
示例#6
0
                   DATA + LANG + "/probabilitats.dat");

# create analyzers
tk=pyfreeling.tokenizer(DATA+LANG+"/tokenizer.dat");
sp=pyfreeling.splitter(DATA+LANG+"/splitter.dat");
sid=sp.open_session();
mf=pyfreeling.maco(op);

# activate mmorpho odules to be used in next call
mf.set_active_options(False, True, True, True,  # select which among created 
                      True, True, False, True,  # submodules are to be used. 
                      True, True, True, True ); # default: all created submodules are used

# create tagger, sense anotator, and parsers
tg=pyfreeling.hmm_tagger(DATA+LANG+"/tagger.dat",True,2);
sen=pyfreeling.senses(DATA+LANG+"/senses.dat");
parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

# process input text
lin=sys.stdin.readline();

print ("Text language is: "+la.identify_language(lin)+"\n");

while (lin) :
        
    l = tk.tokenize(lin);
    ls = sp.split(sid,l,False);

    ls = mf.analyze(ls);
    ls = tg.analyze(ls);
示例#7
0
def set_up_analyzer():
    ## Check whether we know where to find FreeLing data files
    if "FREELINGDIR" not in os.environ:
        if sys.platform == "win32" or sys.platform == "win64":
            os.environ["FREELINGDIR"] = "C:\\Program Files"
        else:
            os.environ["FREELINGDIR"] = "/usr/local"
        #print("FREELINGDIR environment variable not defined, trying ", os.environ["FREELINGDIR"], file=sys.stderr)

        if not os.path.exists(os.environ["FREELINGDIR"] + "/share/freeling"):
            #print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr)
            sys.exit(1)

    # Location of FreeLing configuration files.
    DATA = os.environ["FREELINGDIR"] + "/share/freeling/"

    # Init locales
    pyfreeling.util_init_locale("default")

    #create language detector. Used just to show it. Results are printed
    # but ignored (after, it is assumed language is LANG)
    la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat")

    # create options set for maco analyzer. Default values are Ok, except for data files.
    LANG = "ru"
    op = pyfreeling.maco_options(LANG)
    op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                      DATA + LANG + "/afixos.dat", "",
                      DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                      DATA + LANG + "/quantities.dat",
                      DATA + LANG + "/probabilitats.dat")

    # create analyzers
    tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat")
    sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat")
    sid = sp.open_session()
    mf = pyfreeling.maco(op)

    # activate mmorpho odules to be used in next call
    mf.set_active_options(
        False,
        True,
        True,
        True,  # select which among created 
        True,
        True,
        False,
        True,  # submodules are to be used. 
        True,
        True,
        True,
        True)
    # default: all created submodules are used

    # create tagger, sense anotator, and parsers
    tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
    sen = pyfreeling.senses(DATA + LANG + "/senses.dat")
    #parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
    #dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

    return (tk, sp, sid, mf, tg, sen)