Python process_file示例，process_data.process_file Python示例

示例#1

0

显示文件

文件： application.py 项目： mva001/Data-Validation

def upload():
	#msg2=None
	# Get the name of the uploaded files
	uploaded_files = request.files.getlist("file[]")
	#filenames = []
	for file in uploaded_files:
		# Check if the file is one of the allowed types/extensions
		if file and allowed_file(file.filename):
			# Make the filename safe, remove unsupported chars
			filename = secure_filename(file.filename)
			# Move the file form the temporal folder to the upload
			# folder we setup
			#shutil.rmtree('File_to_validate')
			#os.makedirs('File_to_validate')
			file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
		#else:
			#msg2='Please select a valid extension (.xls or .xlsx)'
		#shutil.rmtree('Report')
		#os.makedirs('Report')

	prodata.process_file(path=os.path.join(app.config['UPLOAD_FOLDER']),company=app.config['COMPANY_FOLDER'],report=os.path.join(app.config['DOWNLOAD_FOLDER']),history=os.path.join(app.config['ITSM_FOLDER']))
	filenames=os.listdir(app.config['DOWNLOAD_FOLDER'])

	#filenames.append(filename)	
	text = open(app.config['DOWNLOAD_FOLDER']+'/issues.txt', 'r+')
	content = text.read()
	text.close()

	# Save the filename into a list, we'll use it later
	# Redirect the user to the uploaded_file route, which
	# will basicaly show on the browser the uploaded file
	# Load an html page with a link to each uploaded file

	return render_template('multi_files_upload.html', filenames=filenames,text=content)#,msg2=msg2)

示例#2

0

显示文件

文件： application.py 项目： mva001/dvmva

def upload():
	msg2=None
	# Get the name of the uploaded files
	uploaded_files = request.files.getlist("file[]")
	for file in uploaded_files:
		# Check if the file is one of the allowed types/extensions
		if file and allowed_file(file.filename):
			# Make the filename safe, remove unsupported chars
			filename = secure_filename(file.filename)
			# Move the file form the temporal folder to the upload
			file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
			msg2='blabla'
		else:
			msg2='Please select a valid extension (.xls or .xlsx)'
			return render_template('multi_upload_index.html',msg2=msg2)
	if len(os.listdir(app.config['UPLOAD_FOLDER']))>0:
		prodata.process_file(path=os.path.join(app.config['UPLOAD_FOLDER']),company=app.config['COMPANY_NAME_FOLDER'],report=os.path.join(app.config['DOWNLOAD_FOLDER']),history=os.path.join(app.config['ITSM_FOLDER']))
		filenames=os.listdir(app.config['DOWNLOAD_FOLDER'])

		text = open(app.config['DOWNLOAD_FOLDER']+'/issues.txt', 'r+',encoding='utf8')
		content = text.read()
		text.close()
		shutil.rmtree(app.config['COMPANY_NAME_FOLDER'])
	# Redirect the user to the uploaded_file route, which
	# will basicaly show on the browser the uploaded file
	# Load an html page with a link to each uploaded file

	return render_template('multi_files_upload.html', filenames=filenames,text=content,msg2=msg2)

示例#3

0

显示文件

文件： application.py 项目： tabs11/Data_Validation

def upload():
    ID_FOLDER = session['filename']
    ITSM_FOLDER = ID_FOLDER + '/ITSM_sites/'
    UPLOAD_FOLDER = ID_FOLDER + '/Files_to_validate/'
    DOWNLOAD_FOLDER = ID_FOLDER + '/Report/'
    os.makedirs(DOWNLOAD_FOLDER)
    if len(os.listdir(UPLOAD_FOLDER)) > 0:
        process_data.process_file(path=UPLOAD_FOLDER,
                                  company=ID_FOLDER.split('_')[0],
                                  report=DOWNLOAD_FOLDER,
                                  history=ITSM_FOLDER)
        filenames = os.listdir(DOWNLOAD_FOLDER)
        text = open(DOWNLOAD_FOLDER + 'issues.txt', 'r+', encoding='utf8')
        content = text.read()
        text.close()
    return render_template('multi_files_upload.html',
                           filenames=filenames,
                           text=content)

示例#4

0

显示文件

文件： run_cnn.py 项目： cc1018/-

def test():
    print("Loading test data...")
    start_time = time.time()
    x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)

    session = tf.Session()
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)  # 读取保存的模型

    print('Testing...')
    loss_test, acc_test = evaluate(session, x_test, y_test)
    msg = 'Test Loss: {0:>6.2}, Test Acc: {1:>7.2%}'
    print(msg.format(loss_test, acc_test))

    batch_size = 128
    data_len = len(x_test)
    num_batch = int((data_len - 1) / batch_size) + 1

    y_test_cls = np.argmax(y_test, 1)
    y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32)  # 保存预测结果
    for i in range(num_batch):  # 逐批次处理
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, data_len)
        feed_dict = {
            model.input_x: x_test[start_id:end_id],
            model.keep_prob: 1.0
        }
        y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict)

    # 评估
    print("Precision, Recall and F1-Score...")
    print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories))

    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

示例#5

0

显示文件

from tqdm import tqdm, trange
import pickle
import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler
from torch.utils.data.distributed import DistributedSampler

from tokenization_unilm import UnilmTokenizer, WhitespaceTokenizer

from modeling_unilm import UnilmForSeq2SeqDecode, UnilmConfig
from process_data import process_file, make_conll_format, TEST_FILE, test_data
import utils_seq2seq

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
test_sentence_all, test_question_all, test_answers_all = process_file(
    TEST_FILE)
make_conll_format(test_sentence_all, test_question_all, test_answers_all,
                  test_data)

ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys())
                  for conf in (UnilmConfig, )), ())
MODEL_CLASSES = {'unilm': (UnilmConfig, UnilmForSeq2SeqDecode, UnilmTokenizer)}

logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
    datefmt='%m/%d/%Y %H:%M:%S',
    level=logging.INFO)
my_logger = logging.getLogger(__name__)


def detokenize(tk_list):

示例#6

0

显示文件

文件： run_cnn.py 项目： cc1018/-

def train():
    print("Configuring TensorBoard and Saver...")
    # 配置 Tensorboard，重新训练时，请将tensorboard文件夹删除，不然图会覆盖
    tensorboard_dir = 'tensorboard/textcnn'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    tf.summary.scalar("loss", model.loss)
    tf.summary.scalar("accuracy", model.acc)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)

    # 配置 Saver
    saver = tf.train.Saver()
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("Loading training and validation data...")
    # 载入训练集与验证集
    start_time = time.time()
    train_x, train_y = process_file(train_dir, word_to_id, cat_to_id, config.seq_length)
    x_train, x_val, y_train, y_val = train_test_split(train_x, train_y)
    # x_train, y_train = process_file(train_dir, word_to_id, cat_to_id, config.seq_length)
    # x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # 创建session
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    print('Training and evaluating...')
    start_time = time.time()
    total_batch = 0  # 总批次
    best_acc_val = 0.0  # 最佳验证集准确率
    last_improved = 0  # 记录上一次提升批次
    require_improvement = 1000  # 如果超过1000轮未提升，提前结束训练

    flag = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)
        for x_batch, y_batch in batch_train:
            feed_dict = feed_data(x_batch, y_batch, config.keep_prob)

            if total_batch % config.save_per_batch == 0:
                # 每多少轮次将训练结果写入tensorboard scalar
                s = session.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(s, total_batch)

            if total_batch % config.print_per_batch == 0:
                # 每多少轮次输出在训练集和验证集上的性能
                feed_dict[model.keep_prob] = 1.0
                loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict)
                loss_val, acc_val = evaluate(session, x_val, y_val)  # todo

                if acc_val > best_acc_val:
                    # 保存最好结果
                    best_acc_val = acc_val
                    last_improved = total_batch
                    saver.save(sess=session, save_path=save_path)
                    improved_str = '*'
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str))

            session.run(model.train_op, feed_dict=feed_dict)  # 运行优化
            total_batch += 1

            if total_batch - last_improved > require_improvement:
                # 验证集正确率长期不提升，提前结束训练
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break  # 跳出循环
        if flag:  # 同上
            break