def main():
    np.random.seed(42)
    data_dir_path = './data'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")
    X = df['text']
    Y = df.title

    config = np.load(
        Seq2SeqSummarizer.get_config_file_path(
            model_dir_path=model_dir_path)).item()

    summarizer = Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in np.random.permutation(np.arange(len(X)))[0:20]:
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)
        # print('Article: ', x)
        print('Generated Headline: ', headline)
        print('Original Headline: ', actual_headline)

    random_text = 'i went to the store and i bought a bag of apples. i saw my friend there and we had a good chat about what is going on in the to stock market'
    print(summarizer.summarize(random_text))
示例#2
0
def main():
    np.random.seed(42)
    data_dir_path = './data'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")
    X = df['text']
    Y = df.title

    config = np.load(
        Seq2SeqSummarizer.get_config_file_path(
            model_dir_path=model_dir_path)).item()

    summarizer = Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in np.random.permutation(np.arange(len(X)))[0:20]:
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)
        # print('Article: ', x)
        print('Generated Headline: ', headline)
        print('Original Headline: ', actual_headline)
def train():
    LOAD_EXISTING_WEIGHTS = False
    LOAD_DFARTICLES = True

    np.random.seed(42)
    report_dir_path = 'reports'
    model_dir_path = 'models'

    print('loading training data')
    if not LOAD_DFARTICLES:
        df = pd.DataFrame(columns=['abstract', 'text'])
        i = 0
        for article in get_articles(year=2017):
            print(i)
            tempDF = pd.DataFrame({
                'abstract': [article['description']],
                'text': [article['fullText']]
            })
            df = df.append(tempDF, ignore_index=True)
            if i % 10 == 0:
                with open('dfArticles2017.pkl', 'wb') as f:
                    print("dumpin time")
                    pickle.dump([df, i], f)
            # if i >= 100:
            #     break
            i += 1
    else:
        pickle_in = open("dfArticles2017.pkl", "rb")
        asdf = pickle.load(pickle_in)
        df = asdf[0]
        i = asdf[1]

    print('extract configuration from input texts ...')
    Y = df.abstract
    X = df['text']

    config = fit_text(X, Y)

    summarizer = Seq2SeqSummarizer(config)

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(
            weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
                model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100)

    history_plot_file_path = report_dir_path + '\\' + Seq2SeqSummarizer.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '\\' + Seq2SeqSummarizer.model_name + '-history-v' + str(
            summarizer.version) + '.png'
    plot_and_save_history(history,
                          summarizer.model_name,
                          history_plot_file_path,
                          metrics={'loss', 'acc'})
def main():
    np.random.seed(42)
    data_dir_path = './data'
    report_dir_path = './reports'
    model_dir_path = './models'

    df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")

    Y = df.title
    X = df['text']

    config = fit_text(X, Y)

    summarizer = Seq2SeqSummarizer(config)

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(
            weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
                model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100)

    history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str(
            summarizer.version) + '.png'
    plot_and_save_history(history,
                          summarizer.model_name,
                          history_plot_file_path,
                          metrics={'loss', 'acc'})
def main():
    np.random.seed(42)
    data_dir_path = 'demo/data'
    model_dir_path = 'demo/models'

    print('loading csv file ...')
    # df = pd.read_csv(data_dir_path + "/fnon-clickbait.csv")
    df = pd.read_csv(data_dir_path + "/clickbait.csv", sep="|")

    X = df.text
    Y = df.title

    config = np.load(
        Seq2SeqSummarizer.get_config_file_path(
            model_dir_path=model_dir_path)).item()

    summarizer = Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in range(len(X)):
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)
        # print('Article: ', x)
        print(i)
        print('Original Headline: ', actual_headline)
        print('Generated Headline: ', headline)
        blue_score = sentence_bleu([actual_headline.split()], headline.split())
        print('BLUE score:', blue_score)
        # print('Actual Text:',x)
        print("-------------------------------------")
def neural_summarize(doc):
    np.random.seed(42)
    model_dir_path = 'models'  # refers to the demo/models folder

    config = np.load(
        Seq2SeqSummarizer.get_config_file_path(model_dir_path=model_dir_path),
        allow_pickle=True).item()

    summarizer = Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))
    headline = summarizer.summarize(doc)
    #print('Generated Headline: ', headline)
    return headline
def test():
    LOAD_DFARTICLES = True
    np.random.seed(42)
    model_dir_path = 'models'  # refers to the demo/models folder

    print('loading validation data')
    if not LOAD_DFARTICLES:
        df = pd.DataFrame(columns=['abstract', 'text'])
        i = 0
        for article in get_articles(year=2018):
            print(i)
            tempDF = pd.DataFrame({
                'abstract': [article['description']],
                'text': [article['preprocessed']]
            })
            df = df.append(tempDF, ignore_index=True)
            if i % 10 == 0:
                with open('dfArticles2018.pkl', 'wb') as f:
                    print("dumpin time")
                    pickle.dump([df, i], f)
            if i >= 100:
                break
            i += 1
    else:
        pickle_in = open("dfArticles2018.pkl", "rb")
        asdf = pickle.load(pickle_in)
        df = asdf[0]
        i = asdf[1]
    Y = df.abstract
    X = df['text']

    config = np.load(
        Seq2SeqSummarizer.get_config_file_path(model_dir_path=model_dir_path),
        allow_pickle=True).item()

    summarizer = Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in range(20):
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)
        #print('Article: ', x)
        print('Generated Headline: ', headline)
        print('Original Headline: ', actual_headline)
示例#8
0
def main():
    np.random.seed(42)
    data_dir_path = 'demo/data'
    report_dir_path = 'demo/reports'
    model_dir_path = 'demo/models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")

    print('extract configuration from input texts ...')
    Y = df.title
    X = df['text']

    config = fit_text(X, Y)

    summarizer = Seq2SeqSummarizer(config)

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    print('demo size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')
    history = summarizer.fit(Xtrain,
                             Ytrain,
                             Xtest,
                             Ytest,
                             epochs=100,
                             model_dir_path=model_dir_path)
示例#9
0
def main():
    np.random.seed(
        42
    )  # seed( ) 用于指定随机数生成时所用算法开始的整数值,如果使用相同的seed( )值,则每次生成的随即数都相同,如果不设置这个值,则系统根据时间来自己选择这个值,此时每次生成的随机数因时间差异而不同。
    data_dir_path = './data'
    report_dir_path = './reports'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/chinese_data.csv")

    print('extract configuration from input texts ...')
    Y = df.title
    X = df['text']

    config = fit_text(
        X, Y
    )  # call line7 from keras_text_summarization.library.applications.fake_news_loader import fit_text

    summarizer = Seq2SeqSummarizer(config)  # 將config回傳參數放入seq2seq.py程式中

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(
            weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
                model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    print('demo size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')
    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=10)

    history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str(
            summarizer.version) + '.png'
    plot_and_save_history(history,
                          summarizer.model_name,
                          history_plot_file_path,
                          metrics={'loss', 'acc'})
示例#10
0
def main():
    np.random.seed(42)
    data_dir_path = './data'
    report_dir_path = './reports'
    model_dir_path = './models'

    print('loading csv file ...')
    #df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")

    print('extract configuration from input texts ...')
    with open(data_dir_path + '/train_preprocessed.en') as f:
        X = f.read().split('\n')

    with open(data_dir_path + '/train_preprocessed.de') as f:
        Y = f.read().split('\n')
    config = fit_text(X, Y)

    summarizer = Seq2SeqSummarizer(config)

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(
            weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
                model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    print('demo size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')
    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100)

    history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str(
            summarizer.version) + '.png'
    plot_and_save_history(history,
                          summarizer.model_name,
                          history_plot_file_path,
                          metrics={'loss', 'acc'})
def main():
    np.random.seed(42)
    data_dir_path = './data'
    report_dir_path = './reports'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv("dcr Man_Cleaned.csv")

    print('extract configuration from input texts ...')
    Y = df.Title
    X = df['Joined']
    config = fit_text(X, Y)

    summarizer = Seq2SeqSummarizer(config)

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(
            weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
                model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

    print('demo size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')
    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100)

    history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str(
            summarizer.version) + '.png'
    plot_and_save_history(history,
                          summarizer.model_name,
                          history_plot_file_path,
                          metrics={'loss', 'acc'})
示例#12
0
from flask import Flask, render_template, request
from keras_text_summarization.library.seq2seq import Seq2SeqSummarizer
import numpy as np

import tensorflow as tf
global graph, model
graph = tf.get_default_graph()

app = Flask("blink")

model_dir_path = './models'

config = np.load(
    Seq2SeqSummarizer.get_config_file_path(
        model_dir_path=model_dir_path)).item()
summarizer = Seq2SeqSummarizer(config)
summarizer.load_weights(
    weight_file_path=Seq2SeqSummarizer.get_weight_file_path(
        model_dir_path=model_dir_path))


@app.route("/", methods=['POST', 'GET'])
def home():

    if request.method == 'POST':
        result = request.form
        with graph.as_default():
            headline = summarizer.summarize(result["content"])

        return render_template("index.html", result={'result': headline})