This repository has been archived by the owner on Apr 17, 2023. It is now read-only.
/
app.py
89 lines (71 loc) · 2.69 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from __future__ import unicode_literals
import os
from flask import Flask, render_template, request, jsonify
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer as TextSummarizer
from sumy.summarizers.edmundson import EdmundsonSummarizer as ESummarizer
from sumy.summarizers.kl import KLSummarizer as KLSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer as LexSummarizer
from sumy.summarizers.lsa import LsaSummarizer as LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer as LuhnSummarizer
from sumy.summarizers.sum_basic import SumBasicSummarizer as SumBasicSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.data.path.append('./nltk_data/')
LANGUAGE = "english"
SENTENCES_COUNT = 4
# configuration
DATABASE = '/tmp/flaskr.db'
DEBUG = True
SECRET_KEY = 'development key'
USERNAME = 'admin'
PASSWORD = 'default'
# create our little application :)
app = Flask(__name__)
app.config.from_object(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/summarize', methods=['POST'])
def summarize():
print dir(request)
print request.json
url = request.json.get('url')
summarizer = request.json.get('summarizer')
summary = summarize_url(url,summarizer)
return jsonify(summary)
def summarize_url(url,summarizer):
# E.G. url = "http://www.cnn.com/2016/06/12/politics/hillary-clinton-bernie-sanders-meeting-tuesday/index.html"
print 'Summarizing ', url
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
if summarizer == 'luhn':
summarizer = LuhnSummarizer(stemmer)
elif summarizer == 'edmundson':
summarizer = ESummarizer(stemmer)
elif summarizer == 'lsa':
summarizer = LsaSummarizer(stemmer)
elif summarizer == 'lex':
summarizer = LexSummarizer(stemmer)
elif summarizer == 'text':
summarizer = TextSummarizer(stemmer)
elif summarizer == 'sb':
summarizer = SumBasicSummarizer(stemmer)
else:
summarizer = KLSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
print summarizer
sentences = []
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print sentence
sentences.append(str(sentence))
return sentences
if __name__ == '__main__':
port = int(os.environ.get("PORT", 5000))
host = os.environ.get("HOST", '0.0.0.0')
app.run(host=host, port=port)