def Build_TF_IDF(self): for i in range(self.Total_Mails): Msg = ProcessData.Process_Msg(self.mails[i]) count = list() for word in Msg: if self.labels[i]: self.TF_Spam[word] = self.TF_Spam.get(word, 0) + 1 else: self.TF_Ham[word] = self.TF_Ham.get(word, 0) + 1 if word not in count: count += [word] for word in count: if self.labels[i]: self.IDF_Spam[word] = self.IDF_Spam.get(word, 0) + 1 else: self.IDF_Ham[word] = self.IDF_Ham.get(word, 0) + 1
def Predict(self, test_data): result = dict() for (i, message) in enumerate(test_data): msg = ProcessData.Process_Msg(message) result[i] = int(self.Classify(msg)) return result