def fit(self, cleaned_data, terms, target, stopwords, idf, weight=None): self.cleaned_data = cleaned_data self.terms = terms self.target = target if weight == None: weighting = Weighting(self.cleaned_data, self.terms) self.weighted_terms = weighting.get_tf_idf_weighting() else: self.weighted_terms = weight self.idf = idf self.stopwords = stopwords for i in range(len(self.cleaned_data)): total_word = 0 for term in self.terms: total_word += self.weighted_terms[term][i] self.total.append(total_word) for term in self.terms: self.con_prob_negative.append( self.calculate_probability_multinomial(term, 'Negatif')) self.con_prob_neutral.append( self.calculate_probability_multinomial(term, 'Netral')) self.con_prob_positive.append( self.calculate_probability_multinomial(term, 'Positif')) self.likelihood = {} indexKomentar = 0 for term in self.terms: temp = [] temp.append(self.con_prob_negative[indexKomentar]) temp.append(self.con_prob_neutral[indexKomentar]) temp.append(self.con_prob_positive[indexKomentar]) self.likelihood[term] = temp # buatprint = [] # buatprint.append(term) # for t in temp: # formatdulu = "{:.6f}".format(t) # buatprint.append(str(formatdulu)) # print(";".join(buatprint)) indexKomentar += 1 self.prior_negative = self.getTotalDocumentWithSpecificCategory( 'Negatif') / self.getTotalDocument() self.prior_neutral = self.getTotalDocumentWithSpecificCategory( 'Netral') / self.getTotalDocument() self.prior_positive = self.getTotalDocumentWithSpecificCategory( 'Positif') / self.getTotalDocument()
result_data_file.close() result_data_file = open(nt_filename, 'rb') new_terms = pickle.load(result_data_file) result_data_file.close() result_data_file = open(rw_filename, 'rb') removed_words = pickle.load(result_data_file) result_data_file.close() result_data_file = open(stopwords_filename, 'rb') stopwords = pickle.load(result_data_file) result_data_file.close() weight = Weighting(new_cleaned_data, new_terms) tfidf = weight.get_tf_idf_weighting() idf = weight.get_idf() nb = NBMultinomial() nb.fit(new_cleaned_data, new_terms, data_train[i]["target"], stopwords, idf, tfidf) def predict(input_tweet): prediction, negatif, netral, positif, used_terms = nb.predict(input_tweet) result_dict = {} result_dict["success"] = True result_dict["type"] = "TBRS" result_dict["data"] = { "prediction": prediction, "negatif": negatif,