def upload(): #msg2=None # Get the name of the uploaded files uploaded_files = request.files.getlist("file[]") #filenames = [] for file in uploaded_files: # Check if the file is one of the allowed types/extensions if file and allowed_file(file.filename): # Make the filename safe, remove unsupported chars filename = secure_filename(file.filename) # Move the file form the temporal folder to the upload # folder we setup #shutil.rmtree('File_to_validate') #os.makedirs('File_to_validate') file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) #else: #msg2='Please select a valid extension (.xls or .xlsx)' #shutil.rmtree('Report') #os.makedirs('Report') prodata.process_file(path=os.path.join(app.config['UPLOAD_FOLDER']),company=app.config['COMPANY_FOLDER'],report=os.path.join(app.config['DOWNLOAD_FOLDER']),history=os.path.join(app.config['ITSM_FOLDER'])) filenames=os.listdir(app.config['DOWNLOAD_FOLDER']) #filenames.append(filename) text = open(app.config['DOWNLOAD_FOLDER']+'/issues.txt', 'r+') content = text.read() text.close() # Save the filename into a list, we'll use it later # Redirect the user to the uploaded_file route, which # will basicaly show on the browser the uploaded file # Load an html page with a link to each uploaded file return render_template('multi_files_upload.html', filenames=filenames,text=content)#,msg2=msg2)
def upload(): msg2=None # Get the name of the uploaded files uploaded_files = request.files.getlist("file[]") for file in uploaded_files: # Check if the file is one of the allowed types/extensions if file and allowed_file(file.filename): # Make the filename safe, remove unsupported chars filename = secure_filename(file.filename) # Move the file form the temporal folder to the upload file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) msg2='blabla' else: msg2='Please select a valid extension (.xls or .xlsx)' return render_template('multi_upload_index.html',msg2=msg2) if len(os.listdir(app.config['UPLOAD_FOLDER']))>0: prodata.process_file(path=os.path.join(app.config['UPLOAD_FOLDER']),company=app.config['COMPANY_NAME_FOLDER'],report=os.path.join(app.config['DOWNLOAD_FOLDER']),history=os.path.join(app.config['ITSM_FOLDER'])) filenames=os.listdir(app.config['DOWNLOAD_FOLDER']) text = open(app.config['DOWNLOAD_FOLDER']+'/issues.txt', 'r+',encoding='utf8') content = text.read() text.close() shutil.rmtree(app.config['COMPANY_NAME_FOLDER']) # Redirect the user to the uploaded_file route, which # will basicaly show on the browser the uploaded file # Load an html page with a link to each uploaded file return render_template('multi_files_upload.html', filenames=filenames,text=content,msg2=msg2)
def upload(): ID_FOLDER = session['filename'] ITSM_FOLDER = ID_FOLDER + '/ITSM_sites/' UPLOAD_FOLDER = ID_FOLDER + '/Files_to_validate/' DOWNLOAD_FOLDER = ID_FOLDER + '/Report/' os.makedirs(DOWNLOAD_FOLDER) if len(os.listdir(UPLOAD_FOLDER)) > 0: process_data.process_file(path=UPLOAD_FOLDER, company=ID_FOLDER.split('_')[0], report=DOWNLOAD_FOLDER, history=ITSM_FOLDER) filenames = os.listdir(DOWNLOAD_FOLDER) text = open(DOWNLOAD_FOLDER + 'issues.txt', 'r+', encoding='utf8') content = text.read() text.close() return render_template('multi_files_upload.html', filenames=filenames, text=content)
def test(): print("Loading test data...") start_time = time.time() x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length) session = tf.Session() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) # 读取保存的模型 print('Testing...') loss_test, acc_test = evaluate(session, x_test, y_test) msg = 'Test Loss: {0:>6.2}, Test Acc: {1:>7.2%}' print(msg.format(loss_test, acc_test)) batch_size = 128 data_len = len(x_test) num_batch = int((data_len - 1) / batch_size) + 1 y_test_cls = np.argmax(y_test, 1) y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32) # 保存预测结果 for i in range(num_batch): # 逐批次处理 start_id = i * batch_size end_id = min((i + 1) * batch_size, data_len) feed_dict = { model.input_x: x_test[start_id:end_id], model.keep_prob: 1.0 } y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict) # 评估 print("Precision, Recall and F1-Score...") print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories)) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif)
from tqdm import tqdm, trange import pickle import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler from torch.utils.data.distributed import DistributedSampler from tokenization_unilm import UnilmTokenizer, WhitespaceTokenizer from modeling_unilm import UnilmForSeq2SeqDecode, UnilmConfig from process_data import process_file, make_conll_format, TEST_FILE, test_data import utils_seq2seq import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" test_sentence_all, test_question_all, test_answers_all = process_file( TEST_FILE) make_conll_format(test_sentence_all, test_question_all, test_answers_all, test_data) ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (UnilmConfig, )), ()) MODEL_CLASSES = {'unilm': (UnilmConfig, UnilmForSeq2SeqDecode, UnilmTokenizer)} logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) my_logger = logging.getLogger(__name__) def detokenize(tk_list):
def train(): print("Configuring TensorBoard and Saver...") # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖 tensorboard_dir = 'tensorboard/textcnn' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) tf.summary.scalar("loss", model.loss) tf.summary.scalar("accuracy", model.acc) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) # 配置 Saver saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) print("Loading training and validation data...") # 载入训练集与验证集 start_time = time.time() train_x, train_y = process_file(train_dir, word_to_id, cat_to_id, config.seq_length) x_train, x_val, y_train, y_val = train_test_split(train_x, train_y) # x_train, y_train = process_file(train_dir, word_to_id, cat_to_id, config.seq_length) # x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # 创建session session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) print('Training and evaluating...') start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False for epoch in range(config.num_epochs): print('Epoch:', epoch + 1) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, config.keep_prob) if total_batch % config.save_per_batch == 0: # 每多少轮次将训练结果写入tensorboard scalar s = session.run(merged_summary, feed_dict=feed_dict) writer.add_summary(s, total_batch) if total_batch % config.print_per_batch == 0: # 每多少轮次输出在训练集和验证集上的性能 feed_dict[model.keep_prob] = 1.0 loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict) loss_val, acc_val = evaluate(session, x_val, y_val) # todo if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) session.run(model.train_op, feed_dict=feed_dict) # 运行优化 total_batch += 1 if total_batch - last_improved > require_improvement: # 验证集正确率长期不提升,提前结束训练 print("No optimization for a long time, auto-stopping...") flag = True break # 跳出循环 if flag: # 同上 break