def calc_terms(self): the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_local_regexp(self.kwargs["string"], the_regexp, self.kwargs["local_k"])
def calc_terms(self): f_ignored_terms = open(self.kwargs["path_ignored_terms"]) for line in f_ignored_terms: self.kwargs["string"] = self.kwargs["string"].replace(line.strip().lower(), "") print line.strip().lower() f_ignored_terms.close() the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_regexp(self.kwargs["string"], the_regexp)
def calc_terms(self): print "#####################" print self.name print self.kwargs["boolBuildSetGlobal"] print "#####################" if self.kwargs["boolBuildSetGlobal"]: print "++++++++++++++++++++++#####################" print self.name print self.kwargs["boolBuildSetGlobal"] print "++++++++++++++++++++++#####################" self.kwargs["setAmbiguous"] = Util.calc_ambiguous_words_set(self.kwargs["string"]) self.kwargs["boolBuildSetGlobal"] = False self.kwargs["mode"] = EnumModes.MODE_CORPUS_POS # MODE_CORPUS_POS return self.kwargs["setAmbiguous"] else: tokens = Util.calc_ambiguous_words(self.kwargs["string"], self.kwargs["setAmbiguous"]) return tokens
def calc_terms(self): the_regexp = ".+_(.+)" if "regexp" in self.kwargs: the_regexp = Util.get_the_regexp(self.kwargs) tokens = Util.calc_split(self.kwargs["string"]) tags = [] for token in tokens: match = re.match(the_regexp, token) if match == None: pass # print token else: tag = match.group(1) tags += [tag] return tags
def calc_terms(self): print "#####################" print self.name print self.kwargs["boolBuildSetGlobal"] print "#####################" if self.kwargs["boolBuildSetGlobal"]: print "++++++++++++++++++++++#####################" print self.name print self.kwargs["boolBuildSetGlobal"] print "++++++++++++++++++++++#####################" self.kwargs["setAmbiguous"] = Util.calc_ambiguous_words_set(self.kwargs['string']) self.kwargs["boolBuildSetGlobal"] = False self.kwargs["mode"] = EnumModes.MODE_CORPUS_POS#MODE_CORPUS_POS return self.kwargs["setAmbiguous"] else: tokens = Util.calc_ambiguous_words(self.kwargs['string'], self.kwargs['setAmbiguous']) return tokens
def calc_terms(self, kwargs, f_src): # save the original corpus corpus_temp = kwargs["corpus"] groups = re.match(r'/home/aplm/nltk_data/corpora/c50/(.+)', corpus_temp.root.path) kwargs["corpus"] = LazyCorpusLoader("c50_term_SFM_23/" + groups.group(1), CategorizedPlaintextCorpusReader, r'.+/.+', cat_pattern=r'(.+)/.+') sfm_terms = Util.calc_SFM(kwargs["corpus"].raw(fileids=[f_src])) # restore the original corpus kwargs["corpus"] = corpus_temp return sfm_terms
def calc_terms(self): return Util.calc_sent_nostopwords_lenght(self.kwargs["string"], RegExps.W_H_C)
def calc_terms(self): return Util.calc_token_lenght(self.kwargs["string"], self.kwargs["regexp"], self.kwargs["template"]) #"word{len:%s}"
def calc_terms(self): the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_ngrams_g(self.kwargs["string"], the_regexp, self.kwargs["nlen"])
def calc_terms(self): return Util.calc_local_ngrams(self.kwargs["string"], self.kwargs["nlen"], self.kwargs["local_k"])
def calc_collocation(self): return Util.calc_trigram_collocation(self.kwargs["string"], RegExps.STOPW_PUNTC, self.kwargs["boolStem"], self.kwargs["setCollocations"])
def calc_terms(self): return self.calc_ngrams_g(Util.calc_split(self.kwargs["string"]), self.kwargs["nlen"])
def calc_terms(self): return Util.calc_split(self.kwargs["string"])
def calc_terms(self): the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_word_pair_opcion(self.kwargs["string"], the_regexp, self.kwargs["opcion"])
def calc_terms(self): the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_markers(self.kwargs["string"], the_regexp, self.kwargs["type_marker"])
def calc_terms(self): return Util.calc_trigrams(self.kwargs["string"], RegExps.STYLE_POS, self.kwargs["boolStem"])
def calc_collocation_set(self): return Util.calc_trigram_collocation_set(self.kwargs["string"], RegExps.STOPW, self.kwargs["boolStem"])
def calc_terms(self): the_regexp = Util.get_the_regexp(self.kwargs) return Util.calc_trigrams(self.kwargs["string"], the_regexp)
def calc_terms(self): pos_terms = Util.calc_lazy_POS(self.kwargs["string"],) return pos_terms
def calc_collocation(self): return Util.calc_trigram_collocation(self.kwargs["string"], self.kwargs["regexp"], self.kwargs["boolStem"], self.kwargs["setCollocations"])
def calc_terms(self): return Util.calc_token_lenght(self.kwargs["string"], self.kwargs["regexp"], self.kwargs["template"])#"word{len:%s}"
def calc_terms(self): return Util.calc_sent_lenght(self.kwargs["string"], self.kwargs["regexp"], self.kwargs["template"])#"sentToken{len:%s}"
def calc_terms(self): return Util.calc_sent_lenght( self.kwargs["string"], self.kwargs["regexp"], self.kwargs["template"]) #"sentToken{len:%s}"
def calc_terms(self): return Util.calc_bigrams(self.kwargs["string"], RegExps.STOPW, self.kwargs["boolStem"])
def calc_terms(self): pos_terms = Util.calc_lazy_POS(self.kwargs["string"], ) return pos_terms
def calc_terms(self): pos_terms = Util.calc_POS_FREELING(self.kwargs["string"], self.kwargs["pos"], self.kwargs["regexp"]) return pos_terms