def classify_news(): rows = s.query(News).filter(News.label != None).all() a = NaiveBayesClassifier() name = [] lables = [] for n in rows: name.append(n.title) lables.append(n.label) a.fit(name, lables) rows = s.query(News).filter(News.label == None).all() for n in rows: name.append(n.title) result = a.predict(name) for num,n in enumerate(rows): n.label = result[num] return template('classify_template', rows=rows)
def classify_news(): s = session() labeled = s.query(News).filter(News.label != None).all() X, y = [], [] for i in labeled: X.append(i.title) y.append(i.label) X = [clean(x).lower() for x in X] d = len(X) // 4 #X_train, y_train, X_test, y_test = X[:3*d], y[:3*d], X[3*d:], y[3*d:] model = NaiveBayesClassifier() #model.fit(X_train, y_train) проверка точности #print(model.score(X_test, y_test)) model.fit(X, y) no_label = s.query(News).filter(News.label == None).all() X_p = [] for i in no_label: X_p.append(i.title) X_p = [clean(x).lower() for x in X_p] y_predict = model.predict(X_p) for j in range(len(no_label)): no_label[j].label = y_predict[j] classified_news = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'good' ] maybe = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'maybe' ] classified_news.extend(maybe) never = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'never' ] classified_news.extend(never) return template('news_recommendations', rows=classified_news)
''' 解析文件中的数据 ''' vocabulary, word_vects, classes = [], [], [] with open(filename, 'r', encoding=ENCODING) as f: for line in f: if line: word_vect, cls = parse_line(line) vocabulary.extend(word_vect) word_vects.append(word_vect) classes.append(cls) vocabulary = list(set(vocabulary)) return vocabulary, word_vects, classes if '__main__' == __name__: clf = NaiveBayesClassifier() vocabulary, word_vects, classes = parse_file('english_big.txt') # 训练数据 & 测试数据 ntest = int(len(classes)*(1-TRAIN_PERCENTAGE)) test_word_vects = [] test_classes = [] for i in range(ntest): idx = random.randint(0, len(word_vects)-1) test_word_vects.append(word_vects.pop(idx)) test_classes.append(classes.pop(idx)) train_word_vects = word_vects train_classes = classes
SPAM = ( "offer is secret", "click secret link", "secret sports link", ) HAM = ( "play sports today", "went play sports", "secret sports event", "sports is today", "sports costs money", ) print "=== Naive Bayes CLassifier ===" c = NaiveBayesClassifier(SPAM, HAM) print "Size of vocabulary: %d" % c.different_words result("SPAM", c.spam.p, 0.3750) result("secret|SPAM", c.spam.p_word("secret"), 0.3333) result("secret|HAM", c.ham.p_word("secret"), 0.0667) result("SPAM|sports", c.p_spam_given_word("sports"), 0.1667) result("SPAM|secret is secret)", c.p_spam_given_phrase("secret is secret"), 0.9615) result("SPAM|today is secret)", c.p_spam_given_phrase("today is secret"), 0) print "\n=== Naive Bayes CLassifier with Laplace Smoothing ===" c = NaiveBayesClassifier(SPAM, HAM, 1) result("SPAM", c.spam.p, 0.4) result("HAM", c.ham.p, 0.6) result("today|SPAM", c.spam.p_word("today"), 0.0476) result("today|HAM", c.ham.p_word("today"), 0.1111) result("SPAM|today is secret)", c.p_spam_given_phrase("today is secret"), 0.4858)
n = BayesNetwork(TEST_NET) P(n, {"B":True}, {"C":True}) P(n, {"C":True}, {"B":True}) print "\n=== Problem 8 ===" from bayes import NaiveBayesClassifier, result SPAM = ( "Top Gun", "Shy People", "Top Hat", ) HAM = ( "Top Gear", "Gun Shy", ) c = NaiveBayesClassifier(SPAM, HAM, 1) result("OLD", c.spam.p) result("Top|OLD", c.spam.p_word("Top")) result("OLD|Top", c.p_spam_given_word("Top")) print "\n=== Problem 10 ===" from linear_regression import linear_regression, gaussian x = [1.0, 3.0, 4.0, 5.0, 9.0] y = [2.0, 5.2, 6.8, 8.4, 14.8] (w0, w1), err = linear_regression(x, y) print "(w0=%.1f, w1=%.1f) err=%.2f" % (w0, w1, err) print "\n=== Problem 12 ===" from logic import Proposition, implies
from bayes import NaiveBayesClassifier, result MOVIE = ( "a perfect world", "my perfect woman", "pretty woman" ) SONG = ( "a perfect day", "electric storm", "another rainy day" ) c = NaiveBayesClassifier(MOVIE, SONG, 1) print "Size of vocabulary: %d" % c.different_words print "\n=== Homework 3.1 ===" result("MOVIE", c.spam.p) result("SONG", c.ham.p) result("perfect|MOVIE", c.spam.p_word("perfect")) result("perfect|SONG", c.ham.p_word("perfect")) result("storm|MOVIE", c.spam.p_word("storm")) result("storm|SONG", c.ham.p_word("storm")) print "\n=== Homework 3.2 ===" result("MOVIE|perfect storm)", c.p_spam_given_phrase("perfect storm")) print "\n=== Homework 3.3 ===" c = NaiveBayesClassifier(MOVIE, SONG) result("MOVIE|perfect storm)", c.p_spam_given_phrase("perfect storm")) print "\n=== Homework 3.4 ==="