def do_analysis(analyzed_file): result_file_name = "result/" + str(analyzed_file) p = re.compile(r'\s(.*)') t = Tokenizer() with open(analyzed_file,mode='r', encoding='utf-8') as read_file: texts = read_file.read() with open(result_file_name, mode='a', encoding='utf-8') as result_file: for token in t.tokenize(str(texts)): check_word = p.sub('',str(token)) if not word_match.word_check(check_word): result_file.write(str(token) + "\n") return result_file_name
def do_upload(): p = re.compile(r'\s(.*)') category = request.forms.get('category') upload = request.files.get('upload') name, ext = os.path.splitext(upload.filename) if ext not in ('.csv','.json','.txt'): return 'File extension not allowed.' save_path = get_save_path_for_category(category) upload.save(save_path,True) # appends upload.filename automatically analyzed_file = str(save_path) + str(upload.filename) result_file_path = do_analysis(analyzed_file) with open(result_file_path,mode='r', encoding='utf-8') as read_file: texts = read_file.readlines() counter = Counter(texts) word_count = [] # total_count = [] for word,cnt in counter.most_common(): check_word = p.sub('',word) if word_match.word_check(check_word): continue word_count.append(str(p.sub('',word)) + " : " + str(cnt)) # total_count.append(cnt) # total_count = sum([cnt]) return template("result",url=url, result_file=result_file_path,result_text=texts,word_count = word_count)