Пример #1
0
  def do_extract(self):
    freq_port = FreqPortion(self.freq_data, self.selected_doc)
    phrase_freq = freq_port.compute_freq_portion()
    normalize_feature(phrase_freq)

    context_freq = ContextFreq(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt)
    phrase_context_freq = context_freq.compute_context_freq()
    normalize_feature(phrase_context_freq)
    
    context_freq = ContextScore(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt, self.global_scores)
    phrase_context_freq = context_freq.compute_context_score()
    #normalize_feature(phrase_context_freq)
    

    idf = IDFMeasure(self.freq_data, self.selected_doc)
    phrase_idf = idf.compute_idf()
    normalize_feature(phrase_idf)    

    cap = CapPortion(self.freq_data, self.parsed_file, self.selected_doc)
    phrase_cap_portion = cap.compute_cap_portion()
    normalize_feature(phrase_cap_portion)

    punc = PuncPortion(self.freq_data, self.parsed_file, self.selected_doc)
    phrase_punc_portion = punc.compute_punc_portion()
    normalize_feature(phrase_punc_portion)

    mi_kl = MIKL(self.freq_patterns, self.freq_data, self.selected_doc)
    phrase_mi, phrase_kl = mi_kl.compute_mi_and_kl()
    normalize_feature(phrase_mi)
    normalize_feature(phrase_kl)

    # Experimental
    #context_freq_dist = ContextFreqDist(self.freq_data, self.selected_doc, self.selected_context, self.total_cnt)
    #phrase_context_freq_dist = context_freq_dist.compute_context_freq_dist()



    return [phrase_freq, phrase_idf, phrase_cap_portion, phrase_punc_portion, phrase_mi, phrase_kl, phrase_context_freq]
Пример #2
0
  def do_extract(self):
    freq_port = FreqPortion(self.freq_data, self.selected_doc)
    phrase_freq = freq_port.compute_freq_portion()
    normalize_feature(phrase_freq)

    idf = IDFMeasure(self.freq_data, self.selected_doc)
    phrase_idf = idf.compute_idf()
    normalize_feature(phrase_idf)    

    cap = CapPortion(self.freq_data, self.parsed_file, self.selected_doc)
    phrase_cap_portion = cap.compute_cap_portion()
    normalize_feature(phrase_cap_portion)

    punc = PuncPortion(self.freq_data, self.parsed_file, self.selected_doc)
    phrase_punc_portion = punc.compute_punc_portion()
    normalize_feature(phrase_punc_portion)

    mi_kl = MIKL(self.freq_patterns, self.freq_data, self.selected_doc)
    phrase_mi, phrase_kl = mi_kl.compute_mi_and_kl()
    normalize_feature(phrase_mi)
    normalize_feature(phrase_kl)

    return [phrase_freq, phrase_idf, phrase_cap_portion, phrase_punc_portion, phrase_mi, phrase_kl]
Пример #3
0
def calc_punc_portion(doc, freq_data):
  punc_portion = PuncPortion(freq_data, doc)
  print punc_portion.compute_punc_portion()