from login.Email import sendemail from login.UserInformation import changeimage, get_avatar from match.match import Matcher as UtilMatch from recommend.recommend import recom_qid from util.util_logging import UtilLogging from util.util_mysql import Users, UtilMysql from util.util_parameter import UtilParameter from util.util_web import get_args app = Flask(__name__) app.secret_key = "7cWjrCrxe2MR68HTLwVUWQ==" login_manager = LoginManager() login_manager.init_app(app) login_manager.login_view = "login" parameter = UtilParameter() logger = UtilLogging(parameter, False, False, False) mysql = UtilMysql(parameter.get_config("mysql"), logger) cache = FileSystemCache('./.FILE/.cache') classifier = Classifier() labeldict = {"xuexi": "学习交流", "huodong": "活动通知", "xunwu": "寻物招领", "chushou": "二手出售", "qiugou": "二手求购", "huzhu": "互助问答", "zhaopin": "招聘求职"} @app.route('/') def index(): """ 首页 :return: index.html """
import util.util_tool as util_tool def split_sent(paragraph): sents = re.split('(。|,|,|!|\!|\.|?|\?)', paragraph) res = [] for sent in sents: if len(sent) != 0 and not re.match('(。|,|,|!|\!|\.|?|\?)', sent): res.append(sent) return res if __name__ == "__main__": # 设置参数 param = UParam() param.read_config_file("config_roberta_large") args = param.get_config(param.PREDICT) # 初始化日志 logger = ULog(param) app_name = args["app_name"] ''' 常数定义 ''' file_name = "File_Directory/results/{}.json".format(app_name) new_data_name = "{}_re_predict_data".format(app_name) new_result_name = "{}_re_predict_out".format(app_name) final_result_name = "{}_final_out".format(app_name) threshold = args["re_predict_threshold"] mix_rate = args['re_predict_mix_rate']
import numpy as np import time import sys from engine.train_for_multitask import TrainEngineForMT as TrainEngine from engine.predict import PredictEngine as PredictEngine from data.Dataset import Dataset from preprocess.preprocess_for_mt import ProcessorForMultiTask as PreProcess from util.util_parameter import UtilParameter as UParam from util.util_logging import UtilLogging as ULog import util.util_tool as util_tool if __name__ == "__main__": # 设置参数 param = UParam() param.read_config_file("config_roberta_large") param.set_config(sys.argv[1:]) args = param.get_config(param.GLOBAL) # 初始化日志 logger = ULog(param) app_name = args["app_name"] # corpus_cleaner = Corpus_cleaner() # # corpus_cleaner.read_from_json("pretrain_corpus.json") # corpus_cleaner.read_from_src() # docs = corpus_cleaner.get_docs() # for i in range(10): # print(docs[i]) # print("###########################################################")
import paddle.fluid as fluid import numpy as np import time import sys from engine.train_for_multitask import TrainEngineForMergeModel as TrainEngine from engine.predict import PredictEngine as PredictEngine from data.Dataset import Dataset from preprocess.preprocess_for_mt import ProcessorForMergeModel as PreProcess from util.util_parameter import UtilParameter as UParam from util.util_logging import UtilLogging as ULog import util.util_tool as util_tool if __name__ == "__main__": # 设置参数 param = UParam() param.read_config_file("config_test") param.set_config(sys.argv[1:]) args = param.get_config(param.GLOBAL) # 初始化日志 logger = ULog(param) app_name = args["app_name"] # corpus_cleaner = Corpus_cleaner() # # corpus_cleaner.read_from_json("pretrain_corpus.json") # corpus_cleaner.read_from_src() # docs = corpus_cleaner.get_docs() # for i in range(10): # print(docs[i]) # print("###########################################################")
import sys from dataset.dataset_for_mrc_squad import DatasetForMrcSquad as Dataset from preprocess.preprocess_for_mrc import PreprocessForMRCChinese as Preprocess from model.network.mrc_net import MRCNet as Net from engine.mrc_train_engine import MRCTrainEngine as TrainEngine from engine.mrc_predict_engine import MRCPredictEngine as PredictEngine from util.util_parameter import UtilParameter as UParam from util.util_logging import UtilLogging as ULog if __name__ == "__main__": # 设置参数 param = UParam() param.read_config_file("config_ernie") param.set_config(sys.argv[1:]) args = param.get_config(param.GLOBAL) # 初始化日志 logger = ULog(args, params=param) app_name = args["app_name"] dataset_args = param.get_config(param.DATASET) # 训练数据预处理 train_dataset = Dataset(dataset_args) train_dataset.read_from_srcfile( dataset_args['train_file_path'], cache=dataset_args['train_example_file_name'], is_training=True) train_preprocess = Preprocess( args=dataset_args, examples=train_dataset.get_examples(), cache=dataset_args['train_feature_file_name']) train_data_generator = train_preprocess.do_preprocess()
import paddle.fluid as fluid import numpy as np import time import sys from engine.train import TrainEngine as TrainEngine from engine.predict import PredictEngine as PredictEngine from data.Dataset import Dataset from preprocess.preprocess import PreProcess from util.util_parameter import UtilParameter as UParam from util.util_logging import UtilLogging as ULog import util.util_tool as util_tool if __name__ == "__main__": # 设置参数 param = UParam() param.read_config_file("config_roberta_large") param.set_config(sys.argv[1:]) args = param.get_config(param.GLOBAL) # 初始化日志 logger = ULog(param) app_name = args["app_name"] # corpus_cleaner = Corpus_cleaner() # # corpus_cleaner.read_from_json("pretrain_corpus.json") # corpus_cleaner.read_from_src() # docs = corpus_cleaner.get_docs() # for i in range(10): # print(docs[i]) # print("###########################################################")