def getModelApi(): preprocessor = Preprocessing.Preprocessor() summarizer = Summarizer.Summarizer('./vocab', './') def preprocessorApi(article): tokenized=preprocessor.tokenize(article) tokenized=(' '.join(tokenized)) return preprocessor.adjust_article(tokenized.split('*N*')).encode('utf-8') def modelApi(preprocessed_articles): return summarizer.summarize(preprocessed_articles) return preprocessorApi,modelApi
def index(): if request.method == 'POST': try: f = request.files['file1'] df = pd.read_excel(f) df.columns = df.columns.str.replace(' ', '') columns = [ 'id', 'MRBTS', "redirFreqCdma", "Item-redirGeranArfcnStructL-redirGeranArfcnPrio", 'redirGeranArfcnStructL', 'redirGeranBandIndicator', 'Item-redirGeranArfcnStructL-redirGeranArfcnValue' ] preprocessor = Preprocessing.Preprocessor() df = preprocessor.handle_missing_values(df) df = preprocessor.remove_columns(df, columns) filename1 = 'DTmodel_Pred_REDRTV1.sav' loaded_model = pickle.load(open(filename1, 'rb')) prediction = loaded_model.predict(df) result = pd.DataFrame(prediction) result.columns = ['Pred'] result = preprocessor.int_to_categorical(result) final_sheet = pd.merge(df, result, left_index=True, right_index=True) print('final sheet is prepared') resp = make_response(final_sheet.to_csv()) resp.headers[ "Content-Disposition"] = "attachment; filename=outputfile.csv" resp.headers["Content-Type"] = "text/csv" return resp #final_sheet.to_excel('https://github.com/HardikDarshi/ML_Projects/tree/Master/outputfile.xlsx', index = True, header = True) #final_sheet.save(os.path.join("uploads", final_sheet.filename)) #return 'Output file is saved to uploads folder' except Exception as e: print('The Exception message is:', e) return 'Something is Wrong' else: return render_template('index.html')
# -*- coding:utf-8 -*- from model import TextClassifier import config import Preprocessing import pandas as pd import numpy as np if __name__ == '__main__': p = Preprocessing.Preprocessor() #读取训练集,验证集和测试集s Train_raw_data = pd.read_csv(config.train_data_path) Validation_raw_data = pd.read_csv(config.validate_data_path) Test_raw_data = pd.read_csv(config.test_data_path) tmp = p.preprocess_content(Train_raw_data['content']) Train_sequences = p.preprocess_text(tmp, train_flag=True) tmp = p.preprocess_content(Validation_raw_data['content']) Validation_sequences = p.preprocess_text(tmp, train_flag=False) tmp = p.preprocess_content(Test_raw_data['content']) Test_sequences = p.preprocess_text(tmp, train_flag=False) # # +Process Training Input, Validation Input, and Test Input # # 1. Divide Content and word Tags # # 2. Remove Stop Words # # 3. Encoding word Tags # # 4. Construct words Dictionary and Padding sequences #