示例#1
0
文件: hackernews.py 项目: vk59/cs102
def recomendations():
    s = session()
    all_data = []
    data = [[], []]
    length = s.query(News).count()
    for ID in range(1, length + 1):
        post = s.query(News).get(ID)
        all_data.append(post)
    for i in range(1, length):
        data[0].append(all_data[i].title)
        data[1].append(all_data[i].label)
    X, y = [], []
    for i in range(len(data[0])):
        X.append(data[0][i])
        y.append(data[1][i])
    X = [clean(x).lower() for x in X]
    model = NaiveBayesClassifier()
    part = len(X)*7 // 10
    X_train, y_train, X_test, y_test = X[:part], y[:part], X[part:], y[part:]
    model.fit(X_train, y_train)
    score=model.score(X_test, y_test)
    rec = NaiveBayesClassifier(alpha=0.01)
    rec.fit(X, y)
    rows = s.query(News).filter(News.label == None).all()
    X_new = []
    for row in rows:
        X_new.append(row.title)
    classified_news = rec.predict(X_new)
    return template('news_recomendations', rows=classified_news, score=score)
def classify_news():
    s = session()
    marked_news = s.query(News).filter(News.label != None).all()
    marked_news = [[new.title, new.label] for new in marked_news]
    X_train = [n[0] for n in marked_news]
    y_train = [n[1] for n in marked_news]

    model = NaiveBayesClassifier(alpha=1)
    model.fit(X_train, y_train)

    news = s.query(News).filter(News.label == None).all()
    news_ids = [new.id for new in news]
    news = [new.title for new in news]
    predicts = model.predict(news)

    classified_news = {'good': [], 'maybe': [], 'never': []}

    for i, predict in enumerate(predicts):
        classified_news[predict].append(news_ids[i])

    rows = []
    for label in ['good', 'maybe', 'never']:
        for id in classified_news[label]:
            rows.append(s.query(News).filter(News.id == id).first())
    return template('classification_template', rows=rows)
示例#3
0
def classify_news():
    s = session()
    classifier = NaiveBayesClassifier()
    train_news = s.query(News).filter(News.label != None).options(
        load_only("title", "label")).all()
    x_train = [row.title for row in train_news]
    y_train = [row.label for row in train_news]
    classifier.fit(x_train, y_train)
    test_news = s.query(News).filter(News.label == None).all()
    x = [row.title for row in test_news]
    labels = classifier.predict(x)

    good = [test_news[i] for i in range(len(test_news)) if labels[i] == 'good']
    maybe = [
        test_news[i] for i in range(len(test_news)) if labels[i] == 'maybe'
    ]
    never = [
        test_news[i] for i in range(len(test_news)) if labels[i] == 'never'
    ]

    return template('recommendations_template', {
        'good': good,
        'never': never,
        'maybe': maybe
    })
示例#4
0
def recommendations():
    TEMPLATE_PATH.insert(0, '')
    s = session()

    # 1. Classify labeled news
    rows = s.query(News).filter(News.label != None).all()

    X, y = [], []
    for row in rows:
        X.append(row.title)
        y.append(row.label)

    X = [clean(x).lower() for x in X]

    model = NaiveBayesClassifier()
    model.fit(X, y)

    # 2. Get unlabeled news
    new_rows = s.query(News).filter(News.label == None).all()

    # 3. Get predictions
    marked = []
    for row in new_rows:
        marked.append((model.predict(row.title.split()), row))

    # 4. Print ranked table
    return template('news_ranked', rows=marked)
示例#5
0
def classify_news():
    s = session()
    labeled_news = s.query(News).filter(News.label != None).filter(
        News.id < 1001).all()
    x = [clean(news.title) for news in labeled_news]
    x = [del_stops(news) for news in x]
    y = [news.label for news in labeled_news]
    classifier = NaiveBayesClassifier(1)
    classifier.fit(x, y)

    rows = s.query(News).filter(News.label == None).all()
    good, maybe, never = [], [], []
    for row in rows:
        row.title = clean(row.title)
        row.title = del_stops(row.title)

        prediction = classifier.predict([row.title])
        print(prediction)

        if prediction == ['good']:
            good.append(row)
        elif prediction == ['maybe']:
            maybe.append(row)
        else:
            never.append(row)

    return template('news_recs', good=good, maybe=maybe, never=never)
示例#6
0
文件: main.py 项目: JJ97/fake-news
def main():
    train_set, validation_set, test_set = read_data()

    print("  TRAINING SET SIZE: {}".format(len(train_set)))
    print("VALIDATION SET SIZE: {}".format(len(validation_set)))
    print("   TESTING SET SIZE: {}\n".format(len(test_set)))

    print("FITTING tf-idf vectoriser")
    tfidf = TfIdfVectoriser(train_set,
                            use_idf=False,
                            df_range=(0.1, 1.0),
                            ngram_range=(1, 2))

    print("LOADING Word2Vec vectoriser with max sequence length {}".format(
        MAX_SEQUENCE_LENGTH))
    w2v_slim = Word2VecVectoriser(MAX_SEQUENCE_LENGTH)
    print("INITIALISING CLASSIFIERS")
    dropout, learning_rate, batch_size, layer_size = (0.2, 0.005, 32, 8)
    lstm = DeepClassifier(w2v_slim, True, layer_size, batch_size,
                          learning_rate, dropout, USE_CACHE)
    rnn = DeepClassifier(w2v_slim, False, layer_size, batch_size,
                         learning_rate, dropout, USE_CACHE)
    bayes = NaiveBayesClassifier(tfidf)

    print("TESTING CLASSIFIERS")
    for classifier in (bayes, lstm, rnn):
        # Naive bayes is not cached so must always be trained
        if not USE_CACHE or isinstance(classifier, NaiveBayesClassifier):
            print('   TRAINING {}'.format(classifier), flush=True)
            classifier.train(train_set, validation_set=validation_set)
        print('   TESTING {}'.format(classifier), flush=True)
        evaluate(classifier, test_set)
示例#7
0
def classify_news():
    sess = session()
    unclassified: tp.List[tp.Tuple[int, str]] = [
        (i.id, stemmer.clear(i.title))
        for i in sess.query(News).filter(News.label == None).all()
    ]
    x1 = [i[1] for i in unclassified]
    if not pathlib.Path(
            f"{os.path.dirname(os.path.realpath(__file__))}/../model/model.pickle"
    ).is_file():
        raise ValueError(
            "Classifier is untrained! Please mark enough news to adequately train the model and run bayes.py to save it."
        )
    with open(
            f"{os.path.dirname(os.path.realpath(__file__))}/../model/model.pickle",
            "rb") as model_file:
        model = NaiveBayesClassifier(alpha=0.1)
        model = pickle.load(model_file)
    labels = model.predict(x1)
    for i, e in enumerate(unclassified):
        extract = sess.query(News).filter(News.id == e[0]).first()
        extract.label = labels[i]
        sess.commit()
    rows = sess.query(News).filter(News.label != None).order_by(
        News.label).all()

    return template("classified_template.tpl", rows=rows)
示例#8
0
def classify_news():
    s = session()
    rows = s.query(News).filter(News.label == None).all()
    training_rows = s.query(News).filter(News.label != None).all()
    print('Received info from database')
    # Fit the classifier
    X, y = [], []
    for news in training_rows:
        X.append(news.title)
        y.append(news.label)
    X = [clean(x).lower() for x in X]
    model = NaiveBayesClassifier(alpha=1)
    model.fit(X, y)
    print('Fitted the classifier')
    unclassified_news = []
    for news in rows:
        unclassified_news.append(news.title)
    predicted_labels = model.predict(unclassified_news)
    print('labels predicted')
    for news, label in zip(rows, predicted_labels):
        news.label = label
    classified_news = sorted(rows, key=lambda news: news.label)
    print('news sorted')
    return template('C:\cs102\homework06\\news_recommendations.tpl',
                    rows=classified_news)
示例#9
0
def classify_news():
    s = session()
    rows_teach = s.query(News).filter(News.label != None).all()
    rows_test = s.query(News).filter(News.label == None).all()
    X_test, y_test = [], []
    X = []
    for el in rows_teach:
        X_test.append(clean(el.title))
        y_test.append(el.label)
    for el in rows_test:
        X.append(clean(el.title))
    model = NaiveBayesClassifier(alpha=0.05)
    model.fit(X_test, y_test)
    result = model.predict(X)
    for i, row in enumerate(rows_test):
        if result[i][1] == "good":
            row.result = 0
        elif result[i][1] == "maybe":
            row.result = 1
        else:
            row.result = 2

    rows_test.sort(key=lambda x: x.result)

    for i, row in enumerate(rows_test):
        print(row.title)
        print(row.result)

    return template('news_template', rows=rows_test)
示例#10
0
def recommendations():
    # 1. Получить список неразмеченных новостей из БД
    # 2. Получить прогнозы для каждой новости
    # 3. Вывести ранжированную таблицу с новостями

    # if define title_unclassified = title_classified = []
    # 2 lists will receive the same values
    title_unclassified = []
    title_classified = []
    label_list = []

    unclassified_news = s.query(News).filter(News.label.is_(None)).all()
    for news in unclassified_news:
        title_unclassified.append(news.title)

    classified_news = s.query(News).filter(News.label.isnot(None)).all()
    for news in classified_news:
        title_classified.append(news.title)
        label_list.append(news.label)

    data = NaiveBayesClassifier(alpha=1)
    data.fit(title_classified, label_list)

    label_classified = data.predict(title_unclassified)
    for i in range(len(unclassified_news)):
        unclassified_news[i].label = label_classified[i]
    s.commit()
    classified_news = s.query(News).filter(News.label == 'good').all()

    return template('recommendations_template', rows=classified_news)
示例#11
0
def classify_news():
    # 1. Получить список неразмеченных новостей из БД
    # 2. Получить прогнозы для каждой новости
    # 3. Вывести ранжированную таблицу с новостями

    s = session()
    labeled = s.query(News).filter(News.label != None).all()
    X, y = [], []
    for i in labeled:
        X.append(i.title)
        y.append(i.label)
    X = [clean(x).lower() for x in X]

    model = NaiveBayesClassifier()
    model.fit(X, y)

    no_label = s.query(News).filter(News.label == None).all()
    X_p = []
    for i in no_label:
        X_p.append(i.title)
    X_p = [clean(x).lower() for x in X_p]
    y_predict = model.predict(X_p)

    classified_news = []
    for j in range(len(no_label)):
        no_label[j].label = y_predict[j]
        if y_predict[j] == 'good':
            classified_news.append(no_label[j])

    return template('news_recommendations', rows=classified_news)
示例#12
0
def classify_news():
    X, y, info = [], [], []
    s = session()
    for i in range(1001):
        for item in s.query(News).get(News.id == i):
            X.append(item.title)
            y.append(item.label)
    X_test = []
    for i in range(1001, len(s.query(News).all()) + 1):
        for item in s.query(News).filter(News.id == i).all():
            X_test.append(item.title)
            info.append(News(author=item.author,
                             points=item.points,
                             comments=item.comments,
                             url=item.url))
    X = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X]
    X_cleared = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X_test]
    model = NaiveBayesClassifier(alpha=0.01)
    model.fit(X, y)
    predicted_news = model.predict(X_cleared)
    classified_news = []
    for i in range(len(predicted_news)):
        classified_news.append([y[i], X_test[i], info[i]])
    classified_news = sorted(classified_news, key=lambda item: item[0])
    return template('homework06/news_recommendations', rows=classified_news)
示例#13
0
def create_model():
    s = session()
    labeled_news = s.query(News).filter(News.label != None).all()
    x_train = [clean(news.title) for news in labeled_news]
    y_train = [news.label for news in labeled_news]
    classifier = NaiveBayesClassifier(0.05)
    [labels, model] = classifier.fit(x_train, y_train)
    return template("news_model", labels=labels, model=model )
示例#14
0
def classify_news(train_titles, train_labels, test_titles):

    bayers = NaiveBayesClassifier()

    bayers.fit(train_titles, train_labels)
    predictions = bayers.predict(test_titles)

    return predictions
示例#15
0
def classify_news():
    # PUT YOUR CODE HERE
    s = get_session(engine)
    model = NaiveBayesClassifier()
    train_set = s.query(News).filter(News.label != None).all()
    model.fit([clean(news.title).lower() for news in train_set], [news.label for news in train_set])
    test = s.query(News).filter(News.label == None).all()
    cell = list(map(lambda x: model.predict(x.title), test))
    return template("color_template", rows=list(map(lambda x: (x[1], colors[cell[x[0]]]), enumerate(test))))
示例#16
0
def classify_news():
    s = session()
    news = s.query(News).filter(News.label != None).all()
    titles = [new.title for new in news]
    labels = [new.label for new in news]

    classificator = NaiveBayesClassifier(titles, labels)

    return classificator
示例#17
0
def classify_news():
    s = session()
    model = NaiveBayesClassifier()
    train = s.query(News).filter(News.label != None).all()
    model.fit([clean(news.title).lower() for news in train],
              [news.label for news in train])
    test = s.query(News).filter(News.label == None).all()
    return template(
        "news_template",
        rows=sorted(
            test,
            key=lambda news: get_l(model.predict(clean(news.title).lower()))))
示例#18
0
def classify_news():
    s = session()
    X_fit, y_fit, X_test = [], [], []
    fitnews = s.query(News).filter(News.label != None).all()
    testnews = s.query(News).filter(News.label == None).all()
    for one in fitnews:
        X_fit.append(one.title)
        y_fit.append(one.label)
    for two in testnews:
        X_test.append(two.title)
    model = NaiveBayesClassifier()
    model.fit(X_fit, y_fit)
    labels = model.predict(X_test)
    rows = zip(testnews, labels)
    return template('classify_template', rows=rows)
示例#19
0
def classify_news():
    s = session()
    rows = s.query(News).filter(News.label == None).all()

    model = NaiveBayesClassifier()
    model.import_model('news_model.json')
    predictions = model.predict([row.title for row in rows])

    d = deque()
    for new, pred in zip(rows, predictions):
        if pred == 'good':
            d.appendleft(new)
        elif pred == 'never':
            d.append(new)

    return template('classify', rows=d)
示例#20
0
def classify_news():
    rows = s.query(News).filter(News.label != None).all()
    a = NaiveBayesClassifier()
    name = []
    lables = []
    for n in rows:
        name.append(n.title)
        lables.append(n.label)
    a.fit(name, lables)
    rows = s.query(News).filter(News.label == None).all()
    for n in rows:
        name.append(n.title)
    result = a.predict(name)
    for num,n in enumerate(rows):
         n.label = result[num]
    return template('classify_template', rows=rows)
示例#21
0
def classify_news():

    train_news = s.query(News).filter(News.label != None).all()
    X_train = [new.title for new in train_news]
    y_train = [new.label for new in train_news]

    clf = NaiveBayesClassifier()
    clf.fit(X_train, y_train)

    classified_news = s.query(News).filter(News.label == None)
    labels = clf.predict([new.title for new in classified_news])
    labels = [(labels[i], new) for i, new in enumerate(classified_news)]
    labels.sort(key=lambda x: x[0])
    rows = [new[1] for new in labels]
    labels = [label[0] for label in labels]

    return template('news_template', rows=rows, labels=labels)
示例#22
0
def classify_news():
    s = session()
    rows_test = s.query(News).filter(News.label == None).all()
    rows_train = s.query(News).filter(News.label != None).all()
    X_train, Y_train, Y_test = [], [], []
    for el in rows_train:
        X_train.append(el.title)
        Y_train.append(el.label)
    y_train = [clean(y).lower() for y in Y_train]
    for el in rows_test:
        Y_test.append(el.title)
    y_test = [clean(c).lower() for c in Y_test]
    model = NaiveBayesClassifier()
    model.fit(X_train, y_train)
    metki = model.predict(y_test)
    sol = zip(rows_test, metki)
    return template('b_template', rows=sol)
示例#23
0
def recommendations():
    s = session()
    list = s.query(News).filter(News.label != None).all()
    X, y = list, [news.label for news in list]
    X_train, y_train, X_test, y_test = train_test_split(
        X, y, param.SEED, train_size=param.TRAIN_SIZE)
    classifier = NaiveBayesClassifier(alpha=param.ALPHA)
    classifier.fit(X_train, y_train)

    data = []

    unlabeled = s.query(News).filter(News.label == None).all()

    for record in classifier.predict(unlabeled):
        data.append((record[0], int(record[1][0]), record[1][1]))

    classified_news = sorted(data, key=lambda x: (x[1], x[2]), reverse=True)
    return template('news_recommendations', rows=classified_news)
示例#24
0
def classify_news():
    s = session()
    rows = s.query(News).filter(News.label == None).all()
    training_rows = s.query(News).filter(News.label != None).all()
    X, y = [], []
    for news in training_rows:
        X.append(news.title)
        y.append(news.label)
    model = NaiveBayesClassifier()
    model.fit(X, y)
    unclassified_news = []
    for news in rows:
        unclassified_news.append(news.title)
    predicted_labels = model.predict(unclassified_news)
    for news, label in zip(rows, predicted_labels):
        news.label = label
    classified_news = sorted(rows, key=lambda news: news.label)
    return template('predicted.tpl', rows=classified_news)
示例#25
0
def classify_news():
    s = session()
    labeled_news = s.query(News).filter(News.label != None).all()
    x_train = [clean(news.title) for news in labeled_news]
    y_train = [news.label for news in labeled_news]
    classifier = NaiveBayesClassifier(0.05)
    classifier.fit(x_train, y_train)
    rows = s.query(News).filter(News.label == None).all()
    good, maybe, never = [], [], []
    for row in rows:
        prediction = classifier.predict(clean(row.title))
        if prediction == "good":
            good.append(row)
        elif prediction == "maybe":
            maybe.append(row)
        else:
            never.append(row)
    return template("news_recommendations", good=good, maybe=maybe, never=never)
示例#26
0
def recommendations():

    s = session()

    classified_news = list()

    unmarked_rows = s.query(News).filter(News.label == None).all()
    marked_rows = s.query(News).filter(News.label != None).all()

    X = list()
    y = list()

    for row in marked_rows:
        title = row.title
        label = row.label
        X.append(title)
        y.append(label)

    model = NaiveBayesClassifier()
    model.fit(X, y)

    for row in unmarked_rows:

        title = row.title
        score = model.predict(title)

        if score == "good":
            score = 0
        elif score == "maybe":
            score = 1
        elif score == "never":
            score = 2
        classified_news.append([score, row])

    print("Before sort")
    print(classified_news[:10])
    classified_news.sort(key=lambda x: x[0])
    print("After sort")
    print(classified_news[:10])
    classified_news = [result[1] for result in classified_news]
    # 1. Получить список неразмеченных новостей из БД
    # 2. Получить прогнозы для каждой новости
    # 3. Вывести ранжированную таблицу с новостями
    return template('news_template', rows=classified_news)
示例#27
0
def main():
    curr_dir = os.path.dirname(__file__)
    csv_file = os.path.join(curr_dir, 'data/play.csv')

    test_case = {
        'Tempo': 'Chuva',
        'Temperatura': 'Quente',
        'Humidade': 'Normal',
        'Vento': 'Forte'
    }

    df = pd.read_csv(csv_file, index_col='Dia')
    X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar']

    clf = NaiveBayesClassifier()
    clf.fit(X, y)
    print('resultado: ', 'Jogar' if clf.predict(test_case) else 'Não Jogar')
    print()
    print(clf.get_probs_str(test_case))
示例#28
0
def classify_news():
    bs = NaiveBayesClassifier(1)
    s = session()
    nolable = s.query(News).filter(News.label == None).all()
    X = processing(nolable)
    X_train = s.query(News).filter(News.label != None).all()
    y = []
    for item in X_train:
        y.append(item.label)
    X_train = processing(X_train)
    bs.fit(X_train, y)
    predictions = bs.predict(X)
    counter = 0
    for item in nolable:
        item.label = predictions[counter]
        counter += 1
    nolable.sort(key=lambda x: x.label)
    nolable.reverse()
    return template('news_template', rows=nolable)
示例#29
0
def classify_news():
    s = Session()
    none_news = []
    rows = s.query(News).filter(News.label == None).all()
    learn_news = s.query(News).filter(News.label != None).all()
    X, y = [], []
    for news in learn_news:
        X.append(news.title)
        y.append(news.label)
    X = [clean(x).lower() for x in X]
    model = NaiveBayesClassifier(alpha=1)
    model.fit(X, y)
    for news in rows:
        none_news.append(news.title)
    predict_labels = model.predict(none_news)
    for news, label in zip(rows, predict_labels):
        news.label = label
    classified_news = sorted(rows, key=lambda news: news.label)
    return template('./classify.tpl', rows=classified_news)
示例#30
0
def classify_news():
    clf = NaiveBayesClassifier()
    s = session()
    training = s.query(Posts).filter(Posts.label != None).all()
    #print(type(training))
    X_train, y = process(training)
    #print(X_train)
    #clf.fit(X_train, y)
    #with open('data.pickle', 'wb') as f:
    #    pickle.dump(clf, f)
    clf = pickle.load(open('data.pickle', 'rb'))
    rows = s.query(Posts).filter(Posts.label == None).all()
    X_test, _ = process(rows)
    predictions = clf.predict(X_test)
    counter = 0
    for row in rows:
        row.label = predictions[counter]
        counter += 1
    rows.sort(key=lambda x: x.label)
    return template('news_template', rows=rows)