def main(): np.random.seed(42) data_dir_path = './data' model_dir_path = './models' print('loading csv file ...') df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv") X = df['text'] Y = df.title config = np.load( Seq2SeqSummarizer.get_config_file_path( model_dir_path=model_dir_path)).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) print('start predicting ...') for i in np.random.permutation(np.arange(len(X)))[0:20]: x = X[i] actual_headline = Y[i] headline = summarizer.summarize(x) # print('Article: ', x) print('Generated Headline: ', headline) print('Original Headline: ', actual_headline) random_text = 'i went to the store and i bought a bag of apples. i saw my friend there and we had a good chat about what is going on in the to stock market' print(summarizer.summarize(random_text))
def main(): np.random.seed(42) data_dir_path = './data' model_dir_path = './models' print('loading csv file ...') df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv") X = df['text'] Y = df.title config = np.load( Seq2SeqSummarizer.get_config_file_path( model_dir_path=model_dir_path)).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) print('start predicting ...') for i in np.random.permutation(np.arange(len(X)))[0:20]: x = X[i] actual_headline = Y[i] headline = summarizer.summarize(x) # print('Article: ', x) print('Generated Headline: ', headline) print('Original Headline: ', actual_headline)
def train(): LOAD_EXISTING_WEIGHTS = False LOAD_DFARTICLES = True np.random.seed(42) report_dir_path = 'reports' model_dir_path = 'models' print('loading training data') if not LOAD_DFARTICLES: df = pd.DataFrame(columns=['abstract', 'text']) i = 0 for article in get_articles(year=2017): print(i) tempDF = pd.DataFrame({ 'abstract': [article['description']], 'text': [article['fullText']] }) df = df.append(tempDF, ignore_index=True) if i % 10 == 0: with open('dfArticles2017.pkl', 'wb') as f: print("dumpin time") pickle.dump([df, i], f) # if i >= 100: # break i += 1 else: pickle_in = open("dfArticles2017.pkl", "rb") asdf = pickle.load(pickle_in) df = asdf[0] i = asdf[1] print('extract configuration from input texts ...') Y = df.abstract X = df['text'] config = fit_text(X, Y) summarizer = Seq2SeqSummarizer(config) if LOAD_EXISTING_WEIGHTS: summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100) history_plot_file_path = report_dir_path + '\\' + Seq2SeqSummarizer.model_name + '-history.png' if LOAD_EXISTING_WEIGHTS: history_plot_file_path = report_dir_path + '\\' + Seq2SeqSummarizer.model_name + '-history-v' + str( summarizer.version) + '.png' plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
def main(): np.random.seed(42) data_dir_path = './data' report_dir_path = './reports' model_dir_path = './models' df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv") Y = df.title X = df['text'] config = fit_text(X, Y) summarizer = Seq2SeqSummarizer(config) if LOAD_EXISTING_WEIGHTS: summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100) history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png' if LOAD_EXISTING_WEIGHTS: history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str( summarizer.version) + '.png' plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
def main(): np.random.seed(42) data_dir_path = 'demo/data' model_dir_path = 'demo/models' print('loading csv file ...') # df = pd.read_csv(data_dir_path + "/fnon-clickbait.csv") df = pd.read_csv(data_dir_path + "/clickbait.csv", sep="|") X = df.text Y = df.title config = np.load( Seq2SeqSummarizer.get_config_file_path( model_dir_path=model_dir_path)).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) print('start predicting ...') for i in range(len(X)): x = X[i] actual_headline = Y[i] headline = summarizer.summarize(x) # print('Article: ', x) print(i) print('Original Headline: ', actual_headline) print('Generated Headline: ', headline) blue_score = sentence_bleu([actual_headline.split()], headline.split()) print('BLUE score:', blue_score) # print('Actual Text:',x) print("-------------------------------------")
def neural_summarize(doc): np.random.seed(42) model_dir_path = 'models' # refers to the demo/models folder config = np.load( Seq2SeqSummarizer.get_config_file_path(model_dir_path=model_dir_path), allow_pickle=True).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) headline = summarizer.summarize(doc) #print('Generated Headline: ', headline) return headline
def test(): LOAD_DFARTICLES = True np.random.seed(42) model_dir_path = 'models' # refers to the demo/models folder print('loading validation data') if not LOAD_DFARTICLES: df = pd.DataFrame(columns=['abstract', 'text']) i = 0 for article in get_articles(year=2018): print(i) tempDF = pd.DataFrame({ 'abstract': [article['description']], 'text': [article['preprocessed']] }) df = df.append(tempDF, ignore_index=True) if i % 10 == 0: with open('dfArticles2018.pkl', 'wb') as f: print("dumpin time") pickle.dump([df, i], f) if i >= 100: break i += 1 else: pickle_in = open("dfArticles2018.pkl", "rb") asdf = pickle.load(pickle_in) df = asdf[0] i = asdf[1] Y = df.abstract X = df['text'] config = np.load( Seq2SeqSummarizer.get_config_file_path(model_dir_path=model_dir_path), allow_pickle=True).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) print('start predicting ...') for i in range(20): x = X[i] actual_headline = Y[i] headline = summarizer.summarize(x) #print('Article: ', x) print('Generated Headline: ', headline) print('Original Headline: ', actual_headline)
def main(): np.random.seed(42) data_dir_path = 'demo/data' report_dir_path = 'demo/reports' model_dir_path = 'demo/models' print('loading csv file ...') df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv") print('extract configuration from input texts ...') Y = df.title X = df['text'] config = fit_text(X, Y) summarizer = Seq2SeqSummarizer(config) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('demo size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100, model_dir_path=model_dir_path)
def main(): np.random.seed( 42 ) # seed( ) 用于指定随机数生成时所用算法开始的整数值,如果使用相同的seed( )值,则每次生成的随即数都相同,如果不设置这个值,则系统根据时间来自己选择这个值,此时每次生成的随机数因时间差异而不同。 data_dir_path = './data' report_dir_path = './reports' model_dir_path = './models' print('loading csv file ...') df = pd.read_csv(data_dir_path + "/chinese_data.csv") print('extract configuration from input texts ...') Y = df.title X = df['text'] config = fit_text( X, Y ) # call line7 from keras_text_summarization.library.applications.fake_news_loader import fit_text summarizer = Seq2SeqSummarizer(config) # 將config回傳參數放入seq2seq.py程式中 if LOAD_EXISTING_WEIGHTS: summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('demo size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=10) history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png' if LOAD_EXISTING_WEIGHTS: history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str( summarizer.version) + '.png' plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
def main(): np.random.seed(42) data_dir_path = './data' report_dir_path = './reports' model_dir_path = './models' print('loading csv file ...') #df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv") print('extract configuration from input texts ...') with open(data_dir_path + '/train_preprocessed.en') as f: X = f.read().split('\n') with open(data_dir_path + '/train_preprocessed.de') as f: Y = f.read().split('\n') config = fit_text(X, Y) summarizer = Seq2SeqSummarizer(config) if LOAD_EXISTING_WEIGHTS: summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('demo size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100) history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png' if LOAD_EXISTING_WEIGHTS: history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str( summarizer.version) + '.png' plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
def main(): np.random.seed(42) data_dir_path = './data' report_dir_path = './reports' model_dir_path = './models' print('loading csv file ...') df = pd.read_csv("dcr Man_Cleaned.csv") print('extract configuration from input texts ...') Y = df.Title X = df['Joined'] config = fit_text(X, Y) summarizer = Seq2SeqSummarizer(config) if LOAD_EXISTING_WEIGHTS: summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('demo size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=100) history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history.png' if LOAD_EXISTING_WEIGHTS: history_plot_file_path = report_dir_path + '/' + Seq2SeqSummarizer.model_name + '-history-v' + str( summarizer.version) + '.png' plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
from flask import Flask, render_template, request from keras_text_summarization.library.seq2seq import Seq2SeqSummarizer import numpy as np import tensorflow as tf global graph, model graph = tf.get_default_graph() app = Flask("blink") model_dir_path = './models' config = np.load( Seq2SeqSummarizer.get_config_file_path( model_dir_path=model_dir_path)).item() summarizer = Seq2SeqSummarizer(config) summarizer.load_weights( weight_file_path=Seq2SeqSummarizer.get_weight_file_path( model_dir_path=model_dir_path)) @app.route("/", methods=['POST', 'GET']) def home(): if request.method == 'POST': result = request.form with graph.as_default(): headline = summarizer.summarize(result["content"]) return render_template("index.html", result={'result': headline})