def recomendations(): s = session() all_data = [] data = [[], []] length = s.query(News).count() for ID in range(1, length + 1): post = s.query(News).get(ID) all_data.append(post) for i in range(1, length): data[0].append(all_data[i].title) data[1].append(all_data[i].label) X, y = [], [] for i in range(len(data[0])): X.append(data[0][i]) y.append(data[1][i]) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() part = len(X)*7 // 10 X_train, y_train, X_test, y_test = X[:part], y[:part], X[part:], y[part:] model.fit(X_train, y_train) score=model.score(X_test, y_test) rec = NaiveBayesClassifier(alpha=0.01) rec.fit(X, y) rows = s.query(News).filter(News.label == None).all() X_new = [] for row in rows: X_new.append(row.title) classified_news = rec.predict(X_new) return template('news_recomendations', rows=classified_news, score=score)
def classify_news(): s = session() marked_news = s.query(News).filter(News.label != None).all() marked_news = [[new.title, new.label] for new in marked_news] X_train = [n[0] for n in marked_news] y_train = [n[1] for n in marked_news] model = NaiveBayesClassifier(alpha=1) model.fit(X_train, y_train) news = s.query(News).filter(News.label == None).all() news_ids = [new.id for new in news] news = [new.title for new in news] predicts = model.predict(news) classified_news = {'good': [], 'maybe': [], 'never': []} for i, predict in enumerate(predicts): classified_news[predict].append(news_ids[i]) rows = [] for label in ['good', 'maybe', 'never']: for id in classified_news[label]: rows.append(s.query(News).filter(News.id == id).first()) return template('classification_template', rows=rows)
def classify_news(): s = session() classifier = NaiveBayesClassifier() train_news = s.query(News).filter(News.label != None).options( load_only("title", "label")).all() x_train = [row.title for row in train_news] y_train = [row.label for row in train_news] classifier.fit(x_train, y_train) test_news = s.query(News).filter(News.label == None).all() x = [row.title for row in test_news] labels = classifier.predict(x) good = [test_news[i] for i in range(len(test_news)) if labels[i] == 'good'] maybe = [ test_news[i] for i in range(len(test_news)) if labels[i] == 'maybe' ] never = [ test_news[i] for i in range(len(test_news)) if labels[i] == 'never' ] return template('recommendations_template', { 'good': good, 'never': never, 'maybe': maybe })
def recommendations(): TEMPLATE_PATH.insert(0, '') s = session() # 1. Classify labeled news rows = s.query(News).filter(News.label != None).all() X, y = [], [] for row in rows: X.append(row.title) y.append(row.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() model.fit(X, y) # 2. Get unlabeled news new_rows = s.query(News).filter(News.label == None).all() # 3. Get predictions marked = [] for row in new_rows: marked.append((model.predict(row.title.split()), row)) # 4. Print ranked table return template('news_ranked', rows=marked)
def classify_news(): s = session() labeled_news = s.query(News).filter(News.label != None).filter( News.id < 1001).all() x = [clean(news.title) for news in labeled_news] x = [del_stops(news) for news in x] y = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(1) classifier.fit(x, y) rows = s.query(News).filter(News.label == None).all() good, maybe, never = [], [], [] for row in rows: row.title = clean(row.title) row.title = del_stops(row.title) prediction = classifier.predict([row.title]) print(prediction) if prediction == ['good']: good.append(row) elif prediction == ['maybe']: maybe.append(row) else: never.append(row) return template('news_recs', good=good, maybe=maybe, never=never)
def main(): train_set, validation_set, test_set = read_data() print(" TRAINING SET SIZE: {}".format(len(train_set))) print("VALIDATION SET SIZE: {}".format(len(validation_set))) print(" TESTING SET SIZE: {}\n".format(len(test_set))) print("FITTING tf-idf vectoriser") tfidf = TfIdfVectoriser(train_set, use_idf=False, df_range=(0.1, 1.0), ngram_range=(1, 2)) print("LOADING Word2Vec vectoriser with max sequence length {}".format( MAX_SEQUENCE_LENGTH)) w2v_slim = Word2VecVectoriser(MAX_SEQUENCE_LENGTH) print("INITIALISING CLASSIFIERS") dropout, learning_rate, batch_size, layer_size = (0.2, 0.005, 32, 8) lstm = DeepClassifier(w2v_slim, True, layer_size, batch_size, learning_rate, dropout, USE_CACHE) rnn = DeepClassifier(w2v_slim, False, layer_size, batch_size, learning_rate, dropout, USE_CACHE) bayes = NaiveBayesClassifier(tfidf) print("TESTING CLASSIFIERS") for classifier in (bayes, lstm, rnn): # Naive bayes is not cached so must always be trained if not USE_CACHE or isinstance(classifier, NaiveBayesClassifier): print(' TRAINING {}'.format(classifier), flush=True) classifier.train(train_set, validation_set=validation_set) print(' TESTING {}'.format(classifier), flush=True) evaluate(classifier, test_set)
def classify_news(): sess = session() unclassified: tp.List[tp.Tuple[int, str]] = [ (i.id, stemmer.clear(i.title)) for i in sess.query(News).filter(News.label == None).all() ] x1 = [i[1] for i in unclassified] if not pathlib.Path( f"{os.path.dirname(os.path.realpath(__file__))}/../model/model.pickle" ).is_file(): raise ValueError( "Classifier is untrained! Please mark enough news to adequately train the model and run bayes.py to save it." ) with open( f"{os.path.dirname(os.path.realpath(__file__))}/../model/model.pickle", "rb") as model_file: model = NaiveBayesClassifier(alpha=0.1) model = pickle.load(model_file) labels = model.predict(x1) for i, e in enumerate(unclassified): extract = sess.query(News).filter(News.id == e[0]).first() extract.label = labels[i] sess.commit() rows = sess.query(News).filter(News.label != None).order_by( News.label).all() return template("classified_template.tpl", rows=rows)
def classify_news(): s = session() rows = s.query(News).filter(News.label == None).all() training_rows = s.query(News).filter(News.label != None).all() print('Received info from database') # Fit the classifier X, y = [], [] for news in training_rows: X.append(news.title) y.append(news.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier(alpha=1) model.fit(X, y) print('Fitted the classifier') unclassified_news = [] for news in rows: unclassified_news.append(news.title) predicted_labels = model.predict(unclassified_news) print('labels predicted') for news, label in zip(rows, predicted_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) print('news sorted') return template('C:\cs102\homework06\\news_recommendations.tpl', rows=classified_news)
def classify_news(): s = session() rows_teach = s.query(News).filter(News.label != None).all() rows_test = s.query(News).filter(News.label == None).all() X_test, y_test = [], [] X = [] for el in rows_teach: X_test.append(clean(el.title)) y_test.append(el.label) for el in rows_test: X.append(clean(el.title)) model = NaiveBayesClassifier(alpha=0.05) model.fit(X_test, y_test) result = model.predict(X) for i, row in enumerate(rows_test): if result[i][1] == "good": row.result = 0 elif result[i][1] == "maybe": row.result = 1 else: row.result = 2 rows_test.sort(key=lambda x: x.result) for i, row in enumerate(rows_test): print(row.title) print(row.result) return template('news_template', rows=rows_test)
def recommendations(): # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями # if define title_unclassified = title_classified = [] # 2 lists will receive the same values title_unclassified = [] title_classified = [] label_list = [] unclassified_news = s.query(News).filter(News.label.is_(None)).all() for news in unclassified_news: title_unclassified.append(news.title) classified_news = s.query(News).filter(News.label.isnot(None)).all() for news in classified_news: title_classified.append(news.title) label_list.append(news.label) data = NaiveBayesClassifier(alpha=1) data.fit(title_classified, label_list) label_classified = data.predict(title_unclassified) for i in range(len(unclassified_news)): unclassified_news[i].label = label_classified[i] s.commit() classified_news = s.query(News).filter(News.label == 'good').all() return template('recommendations_template', rows=classified_news)
def classify_news(): # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями s = session() labeled = s.query(News).filter(News.label != None).all() X, y = [], [] for i in labeled: X.append(i.title) y.append(i.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() model.fit(X, y) no_label = s.query(News).filter(News.label == None).all() X_p = [] for i in no_label: X_p.append(i.title) X_p = [clean(x).lower() for x in X_p] y_predict = model.predict(X_p) classified_news = [] for j in range(len(no_label)): no_label[j].label = y_predict[j] if y_predict[j] == 'good': classified_news.append(no_label[j]) return template('news_recommendations', rows=classified_news)
def classify_news(): X, y, info = [], [], [] s = session() for i in range(1001): for item in s.query(News).get(News.id == i): X.append(item.title) y.append(item.label) X_test = [] for i in range(1001, len(s.query(News).all()) + 1): for item in s.query(News).filter(News.id == i).all(): X_test.append(item.title) info.append(News(author=item.author, points=item.points, comments=item.comments, url=item.url)) X = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X] X_cleared = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X_test] model = NaiveBayesClassifier(alpha=0.01) model.fit(X, y) predicted_news = model.predict(X_cleared) classified_news = [] for i in range(len(predicted_news)): classified_news.append([y[i], X_test[i], info[i]]) classified_news = sorted(classified_news, key=lambda item: item[0]) return template('homework06/news_recommendations', rows=classified_news)
def create_model(): s = session() labeled_news = s.query(News).filter(News.label != None).all() x_train = [clean(news.title) for news in labeled_news] y_train = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(0.05) [labels, model] = classifier.fit(x_train, y_train) return template("news_model", labels=labels, model=model )
def classify_news(train_titles, train_labels, test_titles): bayers = NaiveBayesClassifier() bayers.fit(train_titles, train_labels) predictions = bayers.predict(test_titles) return predictions
def classify_news(): # PUT YOUR CODE HERE s = get_session(engine) model = NaiveBayesClassifier() train_set = s.query(News).filter(News.label != None).all() model.fit([clean(news.title).lower() for news in train_set], [news.label for news in train_set]) test = s.query(News).filter(News.label == None).all() cell = list(map(lambda x: model.predict(x.title), test)) return template("color_template", rows=list(map(lambda x: (x[1], colors[cell[x[0]]]), enumerate(test))))
def classify_news(): s = session() news = s.query(News).filter(News.label != None).all() titles = [new.title for new in news] labels = [new.label for new in news] classificator = NaiveBayesClassifier(titles, labels) return classificator
def classify_news(): s = session() model = NaiveBayesClassifier() train = s.query(News).filter(News.label != None).all() model.fit([clean(news.title).lower() for news in train], [news.label for news in train]) test = s.query(News).filter(News.label == None).all() return template( "news_template", rows=sorted( test, key=lambda news: get_l(model.predict(clean(news.title).lower()))))
def classify_news(): s = session() X_fit, y_fit, X_test = [], [], [] fitnews = s.query(News).filter(News.label != None).all() testnews = s.query(News).filter(News.label == None).all() for one in fitnews: X_fit.append(one.title) y_fit.append(one.label) for two in testnews: X_test.append(two.title) model = NaiveBayesClassifier() model.fit(X_fit, y_fit) labels = model.predict(X_test) rows = zip(testnews, labels) return template('classify_template', rows=rows)
def classify_news(): s = session() rows = s.query(News).filter(News.label == None).all() model = NaiveBayesClassifier() model.import_model('news_model.json') predictions = model.predict([row.title for row in rows]) d = deque() for new, pred in zip(rows, predictions): if pred == 'good': d.appendleft(new) elif pred == 'never': d.append(new) return template('classify', rows=d)
def classify_news(): rows = s.query(News).filter(News.label != None).all() a = NaiveBayesClassifier() name = [] lables = [] for n in rows: name.append(n.title) lables.append(n.label) a.fit(name, lables) rows = s.query(News).filter(News.label == None).all() for n in rows: name.append(n.title) result = a.predict(name) for num,n in enumerate(rows): n.label = result[num] return template('classify_template', rows=rows)
def classify_news(): train_news = s.query(News).filter(News.label != None).all() X_train = [new.title for new in train_news] y_train = [new.label for new in train_news] clf = NaiveBayesClassifier() clf.fit(X_train, y_train) classified_news = s.query(News).filter(News.label == None) labels = clf.predict([new.title for new in classified_news]) labels = [(labels[i], new) for i, new in enumerate(classified_news)] labels.sort(key=lambda x: x[0]) rows = [new[1] for new in labels] labels = [label[0] for label in labels] return template('news_template', rows=rows, labels=labels)
def classify_news(): s = session() rows_test = s.query(News).filter(News.label == None).all() rows_train = s.query(News).filter(News.label != None).all() X_train, Y_train, Y_test = [], [], [] for el in rows_train: X_train.append(el.title) Y_train.append(el.label) y_train = [clean(y).lower() for y in Y_train] for el in rows_test: Y_test.append(el.title) y_test = [clean(c).lower() for c in Y_test] model = NaiveBayesClassifier() model.fit(X_train, y_train) metki = model.predict(y_test) sol = zip(rows_test, metki) return template('b_template', rows=sol)
def recommendations(): s = session() list = s.query(News).filter(News.label != None).all() X, y = list, [news.label for news in list] X_train, y_train, X_test, y_test = train_test_split( X, y, param.SEED, train_size=param.TRAIN_SIZE) classifier = NaiveBayesClassifier(alpha=param.ALPHA) classifier.fit(X_train, y_train) data = [] unlabeled = s.query(News).filter(News.label == None).all() for record in classifier.predict(unlabeled): data.append((record[0], int(record[1][0]), record[1][1])) classified_news = sorted(data, key=lambda x: (x[1], x[2]), reverse=True) return template('news_recommendations', rows=classified_news)
def classify_news(): s = session() rows = s.query(News).filter(News.label == None).all() training_rows = s.query(News).filter(News.label != None).all() X, y = [], [] for news in training_rows: X.append(news.title) y.append(news.label) model = NaiveBayesClassifier() model.fit(X, y) unclassified_news = [] for news in rows: unclassified_news.append(news.title) predicted_labels = model.predict(unclassified_news) for news, label in zip(rows, predicted_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) return template('predicted.tpl', rows=classified_news)
def classify_news(): s = session() labeled_news = s.query(News).filter(News.label != None).all() x_train = [clean(news.title) for news in labeled_news] y_train = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(0.05) classifier.fit(x_train, y_train) rows = s.query(News).filter(News.label == None).all() good, maybe, never = [], [], [] for row in rows: prediction = classifier.predict(clean(row.title)) if prediction == "good": good.append(row) elif prediction == "maybe": maybe.append(row) else: never.append(row) return template("news_recommendations", good=good, maybe=maybe, never=never)
def recommendations(): s = session() classified_news = list() unmarked_rows = s.query(News).filter(News.label == None).all() marked_rows = s.query(News).filter(News.label != None).all() X = list() y = list() for row in marked_rows: title = row.title label = row.label X.append(title) y.append(label) model = NaiveBayesClassifier() model.fit(X, y) for row in unmarked_rows: title = row.title score = model.predict(title) if score == "good": score = 0 elif score == "maybe": score = 1 elif score == "never": score = 2 classified_news.append([score, row]) print("Before sort") print(classified_news[:10]) classified_news.sort(key=lambda x: x[0]) print("After sort") print(classified_news[:10]) classified_news = [result[1] for result in classified_news] # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями return template('news_template', rows=classified_news)
def main(): curr_dir = os.path.dirname(__file__) csv_file = os.path.join(curr_dir, 'data/play.csv') test_case = { 'Tempo': 'Chuva', 'Temperatura': 'Quente', 'Humidade': 'Normal', 'Vento': 'Forte' } df = pd.read_csv(csv_file, index_col='Dia') X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar'] clf = NaiveBayesClassifier() clf.fit(X, y) print('resultado: ', 'Jogar' if clf.predict(test_case) else 'Não Jogar') print() print(clf.get_probs_str(test_case))
def classify_news(): bs = NaiveBayesClassifier(1) s = session() nolable = s.query(News).filter(News.label == None).all() X = processing(nolable) X_train = s.query(News).filter(News.label != None).all() y = [] for item in X_train: y.append(item.label) X_train = processing(X_train) bs.fit(X_train, y) predictions = bs.predict(X) counter = 0 for item in nolable: item.label = predictions[counter] counter += 1 nolable.sort(key=lambda x: x.label) nolable.reverse() return template('news_template', rows=nolable)
def classify_news(): s = Session() none_news = [] rows = s.query(News).filter(News.label == None).all() learn_news = s.query(News).filter(News.label != None).all() X, y = [], [] for news in learn_news: X.append(news.title) y.append(news.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier(alpha=1) model.fit(X, y) for news in rows: none_news.append(news.title) predict_labels = model.predict(none_news) for news, label in zip(rows, predict_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) return template('./classify.tpl', rows=classified_news)
def classify_news(): clf = NaiveBayesClassifier() s = session() training = s.query(Posts).filter(Posts.label != None).all() #print(type(training)) X_train, y = process(training) #print(X_train) #clf.fit(X_train, y) #with open('data.pickle', 'wb') as f: # pickle.dump(clf, f) clf = pickle.load(open('data.pickle', 'rb')) rows = s.query(Posts).filter(Posts.label == None).all() X_test, _ = process(rows) predictions = clf.predict(X_test) counter = 0 for row in rows: row.label = predictions[counter] counter += 1 rows.sort(key=lambda x: x.label) return template('news_template', rows=rows)