def do_extract(self): freq_port = FreqPortion(self.freq_data, self.selected_doc) phrase_freq = freq_port.compute_freq_portion() normalize_feature(phrase_freq) context_freq = ContextFreq(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt) phrase_context_freq = context_freq.compute_context_freq() normalize_feature(phrase_context_freq) context_freq = ContextScore(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt, self.global_scores) phrase_context_freq = context_freq.compute_context_score() #normalize_feature(phrase_context_freq) idf = IDFMeasure(self.freq_data, self.selected_doc) phrase_idf = idf.compute_idf() normalize_feature(phrase_idf) cap = CapPortion(self.freq_data, self.parsed_file, self.selected_doc) phrase_cap_portion = cap.compute_cap_portion() normalize_feature(phrase_cap_portion) punc = PuncPortion(self.freq_data, self.parsed_file, self.selected_doc) phrase_punc_portion = punc.compute_punc_portion() normalize_feature(phrase_punc_portion) mi_kl = MIKL(self.freq_patterns, self.freq_data, self.selected_doc) phrase_mi, phrase_kl = mi_kl.compute_mi_and_kl() normalize_feature(phrase_mi) normalize_feature(phrase_kl) # Experimental #context_freq_dist = ContextFreqDist(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt) #phrase_context_freq_dist = context_freq_dist.compute_context_freq_dist() return [phrase_freq, phrase_idf, phrase_cap_portion, phrase_punc_portion, phrase_mi, phrase_kl, phrase_context_freq]
def do_extract(self): freq_port = FreqPortion(self.freq_data, self.selected_doc) phrase_freq = freq_port.compute_freq_portion() normalize_feature(phrase_freq) idf = IDFMeasure(self.freq_data, self.selected_doc) phrase_idf = idf.compute_idf() normalize_feature(phrase_idf) cap = CapPortion(self.freq_data, self.parsed_file, self.selected_doc) phrase_cap_portion = cap.compute_cap_portion() normalize_feature(phrase_cap_portion) punc = PuncPortion(self.freq_data, self.parsed_file, self.selected_doc) phrase_punc_portion = punc.compute_punc_portion() normalize_feature(phrase_punc_portion) mi_kl = MIKL(self.freq_patterns, self.freq_data, self.selected_doc) phrase_mi, phrase_kl = mi_kl.compute_mi_and_kl() normalize_feature(phrase_mi) normalize_feature(phrase_kl) return [phrase_freq, phrase_idf, phrase_cap_portion, phrase_punc_portion, phrase_mi, phrase_kl]
def calc_punc_portion(doc, freq_data): punc_portion = PuncPortion(freq_data, doc) print punc_portion.compute_punc_portion()