-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
60 lines (51 loc) · 1.87 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from flask import Flask, render_template, request
from flask.ext.sqlalchemy import SQLAlchemy
import os
import requests
import operator
import re
import nltk
from stop_words import stops
from collections import Counter
from bs4 import BeautifulSoup
app = Flask(__name__)
app.config.from_object(os.environ['APP_SETTINGS'])
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)
from models import Result
@app.route('/', methods=['GET', 'POST'])
def index():
errors = []
results = {}
if request.method == 'POST':
try:
url = request.form['url']
r = requests.get(url)
except:
errors.append('Unable to get URL')
return render_template('index.html', errors=errors)
if r:
raw = BeautifulSoup(r.text, 'html.parser').get_text()
nltk.data.path.append('./nltk/data')
tokens = nltk.word_tokenize(raw)
text = nltk.Text(tokens)
nonPunct = re.compile('.*[A-za-z]*.')
raw_words = [w for w in text if nonPunct.match(w)]
raw_words_count = Counter(raw_words)
no_stop_words = [w for w in raw_words if w.lower() not in stops]
no_stop_words_count = Counter(no_stop_words)
results = sorted(no_stop_words_count.items(),
key=operator.itemgetter(1), reverse=True)
try:
result = Result(url=url, result_all=raw_words_count,
results_no_stop_words=no_stop_words_count)
db.session.add(result)
db.session.commit()
except:
errors.append('Unable to add item to database')
return render_template('index.html', errors=errors, results=results)
@app.route('/<name>')
def hello_name(name):
return "Hello {}!".format(name)
if __name__ == '__main__':
app.run()