示例#1
0
    def calc_terms(self):

        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_local_regexp(self.kwargs["string"],
                                the_regexp,
                                self.kwargs["local_k"])
示例#2
0
    def calc_terms(self):

        f_ignored_terms = open(self.kwargs["path_ignored_terms"])

        for line in f_ignored_terms:
            self.kwargs["string"] = self.kwargs["string"].replace(line.strip().lower(), "")
            print line.strip().lower()

        f_ignored_terms.close()

	the_regexp = Util.get_the_regexp(self.kwargs)	

        return Util.calc_regexp(self.kwargs["string"],
                                the_regexp)
示例#3
0
 def calc_terms(self):
     print "#####################"
     print self.name
     print self.kwargs["boolBuildSetGlobal"]
     print "#####################"
     if self.kwargs["boolBuildSetGlobal"]:
         print "++++++++++++++++++++++#####################"
         print self.name
         print self.kwargs["boolBuildSetGlobal"]
         print "++++++++++++++++++++++#####################"
         self.kwargs["setAmbiguous"] = Util.calc_ambiguous_words_set(self.kwargs["string"])
         self.kwargs["boolBuildSetGlobal"] = False
         self.kwargs["mode"] = EnumModes.MODE_CORPUS_POS  # MODE_CORPUS_POS
         return self.kwargs["setAmbiguous"]
     else:
         tokens = Util.calc_ambiguous_words(self.kwargs["string"], self.kwargs["setAmbiguous"])
         return tokens
示例#4
0
    def calc_terms(self):

        the_regexp = ".+_(.+)"
        if "regexp" in self.kwargs:
            the_regexp = Util.get_the_regexp(self.kwargs)
            
        tokens = Util.calc_split(self.kwargs["string"])
        tags = []
        for token in tokens:
            match = re.match(the_regexp, token)
            if match == None:
                pass 
                # print token
            else:
                tag = match.group(1)
                tags += [tag]      

        return tags
示例#5
0
 def calc_terms(self):
     print "#####################"
     print self.name
     print self.kwargs["boolBuildSetGlobal"]
     print "#####################"
     if self.kwargs["boolBuildSetGlobal"]:
         print "++++++++++++++++++++++#####################"
         print self.name
         print self.kwargs["boolBuildSetGlobal"]
         print "++++++++++++++++++++++#####################"
         self.kwargs["setAmbiguous"] = Util.calc_ambiguous_words_set(self.kwargs['string'])
         self.kwargs["boolBuildSetGlobal"] = False
         self.kwargs["mode"] = EnumModes.MODE_CORPUS_POS#MODE_CORPUS_POS
         return self.kwargs["setAmbiguous"]
     else:
         tokens = Util.calc_ambiguous_words(self.kwargs['string'],
                                            self.kwargs['setAmbiguous'])
         return tokens
示例#6
0
    def calc_terms(self, kwargs, f_src):
        # save the original corpus
        corpus_temp = kwargs["corpus"]

        groups = re.match(r'/home/aplm/nltk_data/corpora/c50/(.+)', corpus_temp.root.path)
        kwargs["corpus"] = LazyCorpusLoader("c50_term_SFM_23/" + groups.group(1), CategorizedPlaintextCorpusReader, r'.+/.+', cat_pattern=r'(.+)/.+')

        sfm_terms = Util.calc_SFM(kwargs["corpus"].raw(fileids=[f_src]))

        # restore the original corpus
        kwargs["corpus"] = corpus_temp
        return sfm_terms
示例#7
0
 def calc_terms(self):
     return Util.calc_sent_nostopwords_lenght(self.kwargs["string"],
                                              RegExps.W_H_C)
示例#8
0
 def calc_terms(self):
     return Util.calc_token_lenght(self.kwargs["string"],
                                   self.kwargs["regexp"],
                                   self.kwargs["template"])  #"word{len:%s}"
示例#9
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_ngrams_g(self.kwargs["string"], the_regexp,
                                  self.kwargs["nlen"])
示例#10
0
 def calc_terms(self):
     return Util.calc_local_ngrams(self.kwargs["string"],
                                   self.kwargs["nlen"],
                                   self.kwargs["local_k"])
示例#11
0
 def calc_collocation(self):
     return Util.calc_trigram_collocation(self.kwargs["string"],
                                         RegExps.STOPW_PUNTC,
                                         self.kwargs["boolStem"],
                                         self.kwargs["setCollocations"])
示例#12
0
 def calc_terms(self):
     return self.calc_ngrams_g(Util.calc_split(self.kwargs["string"]),
                               self.kwargs["nlen"])
示例#13
0
 def calc_terms(self):
     return Util.calc_split(self.kwargs["string"])
示例#14
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_word_pair_opcion(self.kwargs["string"],
                                 the_regexp, self.kwargs["opcion"])
示例#15
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_markers(self.kwargs["string"],
                                 the_regexp,
                 self.kwargs["type_marker"])
示例#16
0
 def calc_terms(self):
     return Util.calc_trigrams(self.kwargs["string"], RegExps.STYLE_POS,
                               self.kwargs["boolStem"])
示例#17
0
 def calc_collocation(self):
     return Util.calc_trigram_collocation(self.kwargs["string"],
                                          RegExps.STOPW_PUNTC,
                                          self.kwargs["boolStem"],
                                          self.kwargs["setCollocations"])
示例#18
0
 def calc_terms(self):
     return Util.calc_local_ngrams(self.kwargs["string"],
                             self.kwargs["nlen"],
                             self.kwargs["local_k"])
示例#19
0
 def calc_collocation_set(self):
     return Util.calc_trigram_collocation_set(self.kwargs["string"],
                                             RegExps.STOPW,
                                             self.kwargs["boolStem"])
示例#20
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_trigrams(self.kwargs["string"],
                                 the_regexp)
示例#21
0
 def calc_terms(self):
     pos_terms = Util.calc_lazy_POS(self.kwargs["string"],)
     return pos_terms
示例#22
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_ngrams_g(self.kwargs["string"],
                                 the_regexp,
                                 self.kwargs["nlen"])
示例#23
0
 def calc_terms(self):
     return Util.calc_split(self.kwargs["string"])
示例#24
0
 def calc_collocation(self):
     return Util.calc_trigram_collocation(self.kwargs["string"],
                                         self.kwargs["regexp"],
                                         self.kwargs["boolStem"],
                                         self.kwargs["setCollocations"])
示例#25
0
    def calc_terms(self):
        the_regexp = Util.get_the_regexp(self.kwargs)

        return Util.calc_trigrams(self.kwargs["string"], the_regexp)
示例#26
0
 def calc_terms(self):
     return Util.calc_token_lenght(self.kwargs["string"],
                                   self.kwargs["regexp"],
                                   self.kwargs["template"])#"word{len:%s}"
示例#27
0
 def calc_collocation(self):
     return Util.calc_trigram_collocation(self.kwargs["string"],
                                          self.kwargs["regexp"],
                                          self.kwargs["boolStem"],
                                          self.kwargs["setCollocations"])
示例#28
0
 def calc_terms(self):
     return Util.calc_sent_lenght(self.kwargs["string"],
                                  self.kwargs["regexp"],
                                  self.kwargs["template"])#"sentToken{len:%s}"
示例#29
0
 def calc_terms(self):
     return Util.calc_sent_lenght(
         self.kwargs["string"], self.kwargs["regexp"],
         self.kwargs["template"])  #"sentToken{len:%s}"
示例#30
0
 def calc_terms(self):
     return Util.calc_sent_nostopwords_lenght(self.kwargs["string"],
                                              RegExps.W_H_C)
示例#31
0
 def calc_terms(self):
     return Util.calc_bigrams(self.kwargs["string"], RegExps.STOPW,
                              self.kwargs["boolStem"])
示例#32
0
 def calc_terms(self):
     return Util.calc_bigrams(self.kwargs["string"],
                              RegExps.STOPW,
                              self.kwargs["boolStem"])
示例#33
0
 def calc_collocation_set(self):
     return Util.calc_trigram_collocation_set(self.kwargs["string"],
                                              RegExps.STOPW,
                                              self.kwargs["boolStem"])
示例#34
0
 def calc_terms(self):
     return Util.calc_trigrams(self.kwargs["string"],
                               RegExps.STYLE_POS,
                               self.kwargs["boolStem"])
示例#35
0
 def calc_terms(self):
     pos_terms = Util.calc_lazy_POS(self.kwargs["string"], )
     return pos_terms
示例#36
0
 def calc_terms(self):
     pos_terms = Util.calc_POS_FREELING(self.kwargs["string"],
                                    self.kwargs["pos"],
                                    self.kwargs["regexp"])
     return pos_terms