예제 #1
0
from login.Email import sendemail
from login.UserInformation import changeimage, get_avatar
from match.match import Matcher as UtilMatch
from recommend.recommend import recom_qid
from util.util_logging import UtilLogging
from util.util_mysql import Users, UtilMysql
from util.util_parameter import UtilParameter
from util.util_web import get_args

app = Flask(__name__)
app.secret_key = "7cWjrCrxe2MR68HTLwVUWQ=="
login_manager = LoginManager()
login_manager.init_app(app)
login_manager.login_view = "login"

parameter = UtilParameter()
logger = UtilLogging(parameter, False, False, False)
mysql = UtilMysql(parameter.get_config("mysql"), logger)
cache = FileSystemCache('./.FILE/.cache')
classifier = Classifier()
labeldict = {"xuexi": "学习交流", "huodong": "活动通知",
             "xunwu": "寻物招领", "chushou": "二手出售", "qiugou": "二手求购",
             "huzhu": "互助问答", "zhaopin": "招聘求职"}


@app.route('/')
def index():
    """
    首页
    :return: index.html
    """
예제 #2
0
import util.util_tool as util_tool


def split_sent(paragraph):
    sents = re.split('(。|,|,|!|\!|\.|?|\?)', paragraph)
    res = []
    for sent in sents:
        if len(sent) != 0 and not re.match('(。|,|,|!|\!|\.|?|\?)', sent):
            res.append(sent)
    return res


if __name__ == "__main__":

    # 设置参数
    param = UParam()
    param.read_config_file("config_roberta_large")
    args = param.get_config(param.PREDICT)
    # 初始化日志
    logger = ULog(param)

    app_name = args["app_name"]
    '''
    常数定义
    '''
    file_name = "File_Directory/results/{}.json".format(app_name)
    new_data_name = "{}_re_predict_data".format(app_name)
    new_result_name = "{}_re_predict_out".format(app_name)
    final_result_name = "{}_final_out".format(app_name)
    threshold = args["re_predict_threshold"]
    mix_rate = args['re_predict_mix_rate']
예제 #3
0
import numpy as np
import time
import sys
from engine.train_for_multitask import TrainEngineForMT as TrainEngine
from engine.predict import PredictEngine as PredictEngine
from data.Dataset import Dataset
from preprocess.preprocess_for_mt import ProcessorForMultiTask as PreProcess

from util.util_parameter import UtilParameter as UParam
from util.util_logging import UtilLogging as ULog
import util.util_tool as util_tool


if __name__ == "__main__":
    # 设置参数
    param = UParam()
    param.read_config_file("config_roberta_large")
    param.set_config(sys.argv[1:])
    args = param.get_config(param.GLOBAL)
    # 初始化日志
    logger = ULog(param)

    app_name = args["app_name"]

    # corpus_cleaner = Corpus_cleaner()
    # # corpus_cleaner.read_from_json("pretrain_corpus.json")
    # corpus_cleaner.read_from_src()
    # docs = corpus_cleaner.get_docs()
    # for i in range(10):
    #     print(docs[i])
    #     print("###########################################################")
예제 #4
0
import paddle.fluid as fluid
import numpy as np
import time
import sys
from engine.train_for_multitask import TrainEngineForMergeModel as TrainEngine
from engine.predict import PredictEngine as PredictEngine
from data.Dataset import Dataset
from preprocess.preprocess_for_mt import ProcessorForMergeModel as PreProcess

from util.util_parameter import UtilParameter as UParam
from util.util_logging import UtilLogging as ULog
import util.util_tool as util_tool

if __name__ == "__main__":
    # 设置参数
    param = UParam()
    param.read_config_file("config_test")
    param.set_config(sys.argv[1:])
    args = param.get_config(param.GLOBAL)
    # 初始化日志
    logger = ULog(param)

    app_name = args["app_name"]

    # corpus_cleaner = Corpus_cleaner()
    # # corpus_cleaner.read_from_json("pretrain_corpus.json")
    # corpus_cleaner.read_from_src()
    # docs = corpus_cleaner.get_docs()
    # for i in range(10):
    #     print(docs[i])
    #     print("###########################################################")
예제 #5
0
import sys
from dataset.dataset_for_mrc_squad import DatasetForMrcSquad as Dataset
from preprocess.preprocess_for_mrc import PreprocessForMRCChinese as Preprocess
from model.network.mrc_net import MRCNet as Net
from engine.mrc_train_engine import MRCTrainEngine as TrainEngine
from engine.mrc_predict_engine import MRCPredictEngine as PredictEngine
from util.util_parameter import UtilParameter as UParam
from util.util_logging import UtilLogging as ULog

if __name__ == "__main__":
    # 设置参数
    param = UParam()
    param.read_config_file("config_ernie")
    param.set_config(sys.argv[1:])
    args = param.get_config(param.GLOBAL)
    # 初始化日志
    logger = ULog(args, params=param)
    app_name = args["app_name"]
    dataset_args = param.get_config(param.DATASET)

    # 训练数据预处理
    train_dataset = Dataset(dataset_args)
    train_dataset.read_from_srcfile(
        dataset_args['train_file_path'],
        cache=dataset_args['train_example_file_name'],
        is_training=True)
    train_preprocess = Preprocess(
        args=dataset_args,
        examples=train_dataset.get_examples(),
        cache=dataset_args['train_feature_file_name'])
    train_data_generator = train_preprocess.do_preprocess()
예제 #6
0
import paddle.fluid as fluid
import numpy as np
import time
import sys
from engine.train import TrainEngine as TrainEngine
from engine.predict import PredictEngine as PredictEngine
from data.Dataset import Dataset
from preprocess.preprocess import PreProcess

from util.util_parameter import UtilParameter as UParam
from util.util_logging import UtilLogging as ULog
import util.util_tool as util_tool

if __name__ == "__main__":
    # 设置参数
    param = UParam()
    param.read_config_file("config_roberta_large")
    param.set_config(sys.argv[1:])
    args = param.get_config(param.GLOBAL)
    # 初始化日志
    logger = ULog(param)

    app_name = args["app_name"]

    # corpus_cleaner = Corpus_cleaner()
    # # corpus_cleaner.read_from_json("pretrain_corpus.json")
    # corpus_cleaner.read_from_src()
    # docs = corpus_cleaner.get_docs()
    # for i in range(10):
    #     print(docs[i])
    #     print("###########################################################")