-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
91 lines (62 loc) · 2.47 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
from bottle import route, run, template, static_file,url,request
from janome.tokenizer import Tokenizer
from collections import Counter
import os
import re
import word_match
# read "css","javascript"
@route('/static/<filepath:path>',name='static_file')
def static(filepath):
return static_file(filepath, root='./static')
@route('/<filepath:path>', name='static_file')
def static(filepath):
return static_file(filepath, root="./view/")
@route('/')
def index():
# Logic
return template("index",url=url)
@route('/upload', method='POST')
def do_upload():
p = re.compile(r'\s(.*)')
category = request.forms.get('category')
upload = request.files.get('upload')
name, ext = os.path.splitext(upload.filename)
if ext not in ('.csv','.json','.txt'):
return 'File extension not allowed.'
save_path = get_save_path_for_category(category)
upload.save(save_path,True) # appends upload.filename automatically
analyzed_file = str(save_path) + str(upload.filename)
result_file_path = do_analysis(analyzed_file)
with open(result_file_path,mode='r', encoding='utf-8') as read_file:
texts = read_file.readlines()
counter = Counter(texts)
word_count = []
# total_count = []
for word,cnt in counter.most_common():
check_word = p.sub('',word)
if word_match.word_check(check_word):
continue
word_count.append(str(p.sub('',word)) + " : " + str(cnt))
# total_count.append(cnt)
# total_count = sum([cnt])
return template("result",url=url, result_file=result_file_path,result_text=texts,word_count = word_count)
# create a filepath of result file
def get_save_path_for_category(category):
if category == '':
return "./"
return "tmp/" + str(category) + "/"
def do_analysis(analyzed_file):
result_file_name = "result/" + str(analyzed_file)
p = re.compile(r'\s(.*)')
t = Tokenizer()
with open(analyzed_file,mode='r', encoding='utf-8') as read_file:
texts = read_file.read()
with open(result_file_name, mode='a', encoding='utf-8') as result_file:
for token in t.tokenize(str(texts)):
check_word = p.sub('',str(token))
if not word_match.word_check(check_word):
result_file.write(str(token) + "\n")
return result_file_name
# Server setting - Develop -> Enable to debug and reloader.
run(host='localhost', port=8081, debug=True, reloader=True)