def long_task_train(config): conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: # step 1 更新数据库状态 robot_id = config["robotId"] version_id = config["version"] # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql = f'UPDATE {TABLE_NAME} SET status=2,UPDATED_AT=NOW(),START_TIME=NOW() ' \ f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() print(index), logging.info(index) def do_update_mysql(): # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql_ = f'UPDATE {TABLE_NAME} SET TRAINING_TIME=NOW(),UPDATED_AT=NOW() ' \ f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' # print(sql), logging.info(sql) index = cur.execute(sql_) conn.commit() # step 3 训练模型(训练 意图) # _ = "start train intent ..." # print(_), logging.info(_) # command_0 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_intent -r {robot_id} -v {version_id}' # thread_0 = threading.Thread(target=do_thread, args=('1', command_0)) # thread_0.start() # while True: # if not thread_0.is_alive(): # break # do_update_mysql() # time.sleep(20) @time_spend def do_robot_config1(): # 预处理机器人配置文件,方便加载近内存 try: print("开始第一次载入pickle") c = Config(config) print("开始第一次dumppickle") c.dumps() except: traceback.print_exc() _ = "start dumps robot config ..." print(_), logging.info(_) thread_ = threading.Thread(target=do_robot_config1, ) thread_.start() while True: if not thread_.is_alive(): break do_update_mysql() time.sleep(10) # step 4 训练模型(训练 相似度) _ = "start train similarity ..." print(_), logging.info(_) # os.system("source activate baili") # command_1 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_similarity -r {robot_id} -v {version_id}' # # command_1 = f'CUDA_VISIBLE_DEVICES=0 conda activate baili && {PYTHON_BIN_PATH} -m train_similarity -r {robot_id} -v {version_id}' # thread_1 = threading.Thread(target=do_thread, args=('1', command_1)) # thread_1.start() # while True: # if not thread_1.is_alive(): # break # do_update_mysql() # time.sleep(20) _ = "start train sentence bert ..." print(_), logging.info(_) command_2 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_similarity_sentenceBERT -r {robot_id} -v {version_id}' print(command_2) thread_2 = threading.Thread(target=do_thread, args=('1', command_2)) thread_2.start() while True: if not thread_2.is_alive(): break do_update_mysql() time.sleep(20) # step 2 执行任务(训练配置预处理) @time_spend def do_robot_config2(): # 预处理机器人配置文件,方便加载近内存 c = Config(config) c.process_sentence_bert() c.dumps() _ = "start dumps robot config ..." print(_), logging.info(_) thread_ = threading.Thread(target=do_robot_config2, ) thread_.start() while True: if not thread_.is_alive(): break do_update_mysql() time.sleep(10) # 删除老版本的缓存 redis.delete(f"robot_{robot_id}_version_{version_id}_v2") # step 5 上传配置与模型 到sftp服务器 _ = "start upload sftp ..." print(_), logging.info(_) @time_spend def upload_to_sftp(): # 由于是隔离的,需要添加下 ftp 服务,上传、下载 sftp = SftpServer(HOST, USER, PASSWORD, PORT) sftp.put_file(f'config_models/robot_{robot_id}_version_{version_id}.model', f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}.model") # 上传训练好的模型文件 if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_intent'): _ = "上传意图模型" print(_), logging.info(_) os.chdir(__PATH__) sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_intent', f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_intent") _ = "上传模型配置完成, 上传相似度模型" print(_), logging.info(_) if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_similarity'): os.chdir(__PATH__) sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_similarity', f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_similarity") _ = "上传相似度模型完成" print(_), logging.info(_) os.chdir(__PATH__) if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert'): os.chdir(__PATH__) sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert', f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert") _ = "上传senenceBert相似度模型完成" print(_), logging.info(_) os.chdir(__PATH__) sftp.close() thread_3 = threading.Thread(target=upload_to_sftp, ) thread_3.start() while True: if not thread_3.is_alive(): break do_update_mysql() time.sleep(20) # step 3 更新数据库状态 (上传之后,避免数据库状态 更新是, 模型已上传完成) robot_id = config["robotId"] version_id = config["version"] t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql = f'UPDATE {TABLE_NAME} SET status=0,UPDATED_AT=NOW(),END_TIME=NOW() ' \ f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() print(index), logging.info(index) _ = "训练完成,更新到数据库" print(_), logging.info(_) # 首先查询 当前机器的 训练好的版本情况 sql = f"SELECT robot_id,version_id,status FROM {TABLE_NAME} " \ f"where status=0 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) result_ = cur.fetchall() data = [] for i in result_: data.append({ "robot_id": i[0], "version": i[1], "status": i[2], }) robot_versions = {} for i in data: robot_versions[i["robot_id"]] = robot_versions.get(i["robot_id"], []) + [int(i["version"])] _ = robot_versions print(_), logging.info(_) for robot_id, versions in robot_versions.items(): versions = list(set(versions)) versions = sorted(versions, key=lambda x: int(x), reverse=True) # print(versions) top2versions = versions[:2] for version_id in versions: if version_id in top2versions: pass else: if os.path.exists(f"./config_models/robot_{robot_id}_version_{version_id}.model"): _ = f"remove old version, robot: {robot_id}, version: {version_id}" print(_), logging.info(_) try: shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_intent") except: pass try: shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_similarity") except: pass try: shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert") except: pass try: os.remove(f"./config_models/robot_{robot_id}_version_{version_id}.model") except: pass else: pass cur.close() conn.close() except Exception as e: # 发生异常,更新数据库状态,置为 3 traceback.print_exc() _ = "error: {}".format(repr(e)) print(_), logging.info(_) robot_id = config["robotId"] version_id = config["version"] t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql = f'UPDATE {TABLE_NAME} SET status=3,STATUS_MESSAGE="{_}",UPDATED_AT=NOW(),END_TIME=NOW() ' \ f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() print(index), logging.info(index) traceback.print_exc() cur.close() conn.close()
def train_similarity_sentenceBERT(robot_id, version): """ 训练 意图识别 模型 """ max_seq_length = 24 batch_size = 128 labels = ["0", "1"] # 和蓝博士反复测试, bert-tiny 版,训练异常,一直无法学习,尝试多组参数(训练epoch、学习率、批次大小等) # pretrain_name = "bert-tiny" # 哈工大版本,可以学习 # pretrain_name = "roberta_wwm_ext_3" # 经测试,下面预训练好相似度模型(sentence bert结构会加快收敛速度,由于测试数据少,准确率都在100%,这个无意义) pretrain_name = "distiluse-base-multilingual-cased-v2" train_dir = "train_files" # 初始化权重模型位置 pretrain_path = f"pretrained_models/{pretrain_name}" path = f"config_models/robot_{robot_id}_version_{version}.model" print("model_path") print(path) if os.path.exists(pretrain_path): _ = f"start train sentence_bert model, robot_id: {robot_id}, version:{version} " print(_), logging.info(_) c: Config = pickle.load(open(path, "rb")) temp_dir = f"{train_dir}/robot_{robot_id}_version_{version}_sentbert" if not os.path.exists(temp_dir): os.mkdir(temp_dir) examples_train, examples_dev = prepare_csv_data(c, temp_dir) # pretrain_path='/data4/azun/project_dialout/pretrained_models/distiluse-base-multilingual-cased-v2' print(pretrain_path) print("训练集") print(len(examples_train)) print("测试集") print(len(examples_dev)) if (len(examples_train) > 50000): examples_train = examples_train[:50000] if (len(examples_dev) > 5000): examples_dev = examples_dev[:4000] ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### model = SentenceTransformer(pretrain_path) train_dataset = SentencesDataset(examples_train, model) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16) train_loss = losses.CosineSimilarityLoss(model) model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=1, warmup_steps=100) model.save( f"config_models/robot_{robot_id}_version_{version}_similarity_sentbert" ) print("模型保存成功,地址是:") print( f"config_models/robot_{robot_id}_version_{version}_similarity_sentbert" ) ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### ####################### result = {"train": 0.921, "dev": 0.932} # command = f"cp {pretrain_path}/bert_config.json config_models/robot_{robot_id}_version_{version}_similarity" # os.system(command) # command = f"cp {pretrain_path}/vocab.txt config_models/robot_{robot_id}_version_{version}_similarity" # os.system(command) # 需要上传下成绩,更新到数据库 conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: similarity_result = json.dumps(result, ensure_ascii=False) sql_ = f"UPDATE {TABLE_NAME} SET SIMILARITY_RESULT='{similarity_result}',UPDATED_AT=NOW() " \ f"WHERE robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" print(sql_) index = cur.execute(sql_) conn.commit() except Exception as e: print(repr(e)) pass finally: cur.close() conn.close() #####这里是更新intent,result现在做只是为了适配以前的后端,以后删除 conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: similarity_result = json.dumps(result, ensure_ascii=False) sql_ = f"UPDATE {TABLE_NAME} SET INTENT_RESULT='{similarity_result}',UPDATED_AT=NOW() " \ f"WHERE robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" print(sql_) index = cur.execute(sql_) conn.commit() except Exception as e: print(repr(e)) pass finally: cur.close() conn.close() print(result) else: _ = f"can not found, robot_id: {robot_id}, version:{version} " print(_), logging.info(_)
def interval_sql_train(): """ 由于频繁访问数据库,最好还是用一下数据库连接池! 状态码: 0 - 训练完成 1 - 在队列中 2 - 在训练中 3 - 训练失败 """ delete_robot = redis.get("delete_robot") if delete_robot: delete_robot = str(delete_robot.decode()) robots = delete_robot.split(";") for robot in robots: # 执行下线操作 try: res = requests.post(f"{MODEL_MANAGER_HOST}/api/delete_robot", json={"robotId": robot}).json() _ = f"{robot} : {res}" print(_), logging.info(_) except: pass # 下面命令会删除 模型文件 command = f"rm -r ./config_models/robot_{robot}_version*" os.system(command) command = f"rm ./config_models/robot_{robot}_version*" os.system(command) # 不删除,因为可能多点,其他节点也需要操作,可能进行重复,不影响 # redis.delete("delete_robot") else: pass conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() # 首先查询是否存在训练中的任务 sql = f"select robot_id,version_id,TRAINING_TIME from {TABLE_NAME} " \ f"where status=2 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) result = cur.fetchall() if len(result): row = result[0] _ = "robot:{}, version:{} is in training....".format(row[0], row[1]) print(_), logging.info(_) # 再加个判断失败的功能,一直监听训练是否无故卡死等状态 # datetime.strptime(row[2], "%Y-%m-%d %H:%M:%S") sql = f'UPDATE {TABLE_NAME} SET status=3,UPDATED_AT=NOW() ' \ f'WHERE TRAINING_TIME<SUBDATE(now(),interval 3 minute) and status=2 ' \ f'and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() print(index), logging.info(index) else: sql = f"select robot_id,version_id,es_id,status from {TABLE_NAME} " \ f"where status=1 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}' order by CREATED_AT" cur.execute(sql) result = cur.fetchall() if len(result): row = result[0] _ = "robot:{}, version:{} is starting....".format(row[0], row[1]) print(_), logging.info(_) try: # 2020-12-05 改成mongodb进行读取 # 采用 es 获取 训练数据 mongo_result = collection.find({"_id": ObjectId(row[2])}) print(row[2]) # res = get_data_by_id(row[2]) # config = json.loads(res) config = mongo_result[0] # 2020-12-05 改成mongodb进行读取 1 1 1 Gg08WXUB-deteIE8h-bN 192.168.1.245:19200/algorithm_train_data_xs/_doc/Gg08WXUB-deteIE8h-bN 1 1 {"dev": 0.9743589743589745, "train": 0.9935064935064936} {"dev": 1, "train": 1} 2020-11-03 16:20:01 2020-11-03 16:16:30 2020-11-03 16:20:21 实在科技 2020-10-24 14:12:18 实在科技 2020-11-13 15:00:30 0 1 # 后台执行任务ObjectId("5fcaf28b8f762624170ca9a3") executor.submit(long_task_train, config) except Exception as e: _ = "error: {}".format(repr(e)) print(_), logging.info(_) traceback.print_exc() robot_id = row[0] version_id = row[1] # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql = f'UPDATE {TABLE_NAME} SET status=3,STATUS_MESSAGE="{_}",UPDATED_AT=NOW(),END_TIME=NOW() ' \ f'WHERE robot_id="{robot_id}" and version_id="{version_id}" ' \ f'and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";' print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() else: _ = "no task..." print(_), logging.info(_) cur.close() conn.close()
def fun(): _ = f"机器编号:{robot_id}" print(_), logging.info(_) conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: # step-1, 检测 机器 是否存在 sql = f"SELECT robot_id,version_id,status " \ f"FROM {TABLE_NAME} " \ f"where robot_id='{robot_id}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" cur.execute(sql) result_ = cur.fetchall() if len(result_): sql = f"SELECT robot_id,version_id,status " \ f"FROM {TABLE_NAME} " \ f"where robot_id='{robot_id}' and status=2 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) result_ = cur.fetchall() if len(result_): return {"code": -12, "msg": "机器人正在训练中...", "data": {}} t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql = f"UPDATE {TABLE_NAME} SET DELETE_FLAG=1, IS_ONLINE=0, UPDATED_AT=NOW() " \ f"where robot_id='{robot_id}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() print(index), logging.info(index) # 模型管理哪里执行下 下线操作 try: res = requests.post( f"{MODEL_MANAGER_HOST}/api/delete_robot", json={ "robotId": robot_id }).json() _ = f"{robot_id} : {res}" print(_), logging.info(_) except: pass # 同时需要更新下redis online_robot_versions = redis.get("online_robot_versions") if online_robot_versions: online_robot_versions = str(online_robot_versions.decode()) now_online_robot_versions = [] for rv in online_robot_versions.split(";"): r = rv.split(":")[0] if r == str(robot_id): continue else: now_online_robot_versions.append(rv) # 重置当前 上线版本 redis.set("online_robot_versions", ";".join(now_online_robot_versions)) _ = "now online robot_version:" + str( redis.get("online_robot_versions").decode()) print(_), logging.info(_) delete_robot = redis.get("delete_robot") if delete_robot: delete_robot = str(delete_robot.decode()) delete_robot += ";" + str(robot_id) redis.set("delete_robot", delete_robot) redis.expire("delete_robot", 60) else: delete_robot = str(robot_id) redis.set("delete_robot", delete_robot) redis.expire("delete_robot", 60) print(redis.get("delete_robot")) return {"code": 0, "msg": "机器删除成功!", "data": {}} else: delete_robot = redis.get("delete_robot") if delete_robot: delete_robot = str(delete_robot.decode()) delete_robot += ";" + str(robot_id) redis.set("delete_robot", delete_robot) redis.expire("delete_robot", 60) else: delete_robot = str(robot_id) redis.set("delete_robot", delete_robot) redis.expire("delete_robot", 60) print(redis.get("delete_robot")) return {"code": -11, "data": {}, "msg": "机器不存在或已删除!"} except Exception as e: traceback.print_exc() _ = repr(e) print(_), logging.info(_) return {"code": -1, "data": {}, "msg": _} pass finally: cur.close() conn.close()
def fun(): _ = f"机器编号:{robot_id},版本编号:{version}" print(_), logging.info(_) conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: # step-1, 检测 机器 是否存在 sql = f"SELECT robot_id,version_id,status " \ f"FROM {TABLE_NAME} " \ f"where robot_id='{robot_id}' and version_id='{version}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" cur.execute(sql) result_ = cur.fetchall() if len(result_): if result_[0][2] == "1": return {"code": -12, "msg": "机器人还在排队中...", "data": {}} elif result_[0][2] == "2": return {"code": -12, "msg": "机器人正在训练中...", "data": {}} elif result_[0][2] == "3": return {"code": -12, "msg": "机器人该版本训练失败了_-_", "data": {}} # 修改数据库 t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # 发布上线了 try: res_ = requests.post( f"{MODEL_MANAGER_HOST}/api/model_query", json={ "robotId": robot_id, "version": version }).json() if int(res_["data"]["intent_model"]) > 0: # 查询模型存在,即跳过; print("intent:", res_["data"]["intent_model"]) pass else: # 调用下接口,若是deep模式,会自动提起模型,或者fast模型,返回-1 _ = f"push model {robot_id}-{version} online.." print(_), logging.info(_) # payload_intent = {"modelName": "intent_model", # "robotId": robot_id, # "version": version, # "text": "你好?", } # payload_similar = {"modelName": "similar_model", # "robotId": robot_id, # "version": version, # "text_as": ["今天天气", "今天天气"], # "text_bs": ["杭州疫情", "今天天气不错"]} payload_similar = { "modelName": "sentence_bert", "robotId": robot_id, "version": version, "text_as": "好的" } url = f"{MODEL_MANAGER_HOST}/api/model_predict" # requests.post(url=url, json=payload_intent).json() requests.post(url=url, json=payload_similar).json() pass except Exception as e: _ = repr(e) print(_), logging.info(_) return {"code": -12, "msg": "机器人该版本发布失败了_-_", "data": {}} # 当前版本置为1,其他版本置为0 sql1 = f"UPDATE {TABLE_NAME} SET IS_ONLINE=1,UPDATED_AT=NOW() " \ f"where robot_id='{robot_id}' and version_id='{version}' " \ f"and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" sql2 = f"UPDATE {TABLE_NAME} SET IS_ONLINE=0,UPDATED_AT=NOW() " \ f"where robot_id='{robot_id}' and version_id !='{version}' " \ f"and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" index1 = cur.execute(sql1) index2 = cur.execute(sql2) print(sql1 + sql2), logging.info(sql1 + sql2) conn.commit() print(index1, index2), logging.info(index1 + index2) # 可以更新 到当前机器人所有的发布版本信息到redis,减少重复查询数据库操作 sql = f"SELECT robot_id,version_id " \ f"FROM {TABLE_NAME} " \ f"where IS_ONLINE=1 and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;" cur.execute(sql) result_ = cur.fetchall() if len(result_): online_robot_versions = [] for row in result_: online_robot_versions.append(row[0] + ":" + row[1]) _ = ";".join(online_robot_versions) print(_), logging.info(_) redis.set("online_robot_versions", ";".join(online_robot_versions)) print(redis.get("online_robot_versions")) return {"code": 0, "msg": "机器发布成功!", "data": {}} else: return {"code": -11, "data": {}, "msg": "机器不存在或已删除!"} except Exception as e: traceback.print_exc() _ = repr(e) print(_), logging.info(_) return {"code": -1, "data": {}, "msg": _} finally: cur.close() conn.close()
def fun(): # 还需新增状态 _ = "【查询模型状态】机器编号:{}, 版本编号:{}".format(robot_id, version) print(_), logging.info(_) conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() """ 状态码:(由于设计,队列也更改为训练中) 0 - 训练完成 1 - 在队列中 2 - 在训练中 3 - 训练失败 """ # 首先查询是否存在 机器和改版本 sql = f"select robot_id,version_id,status,STATUS_MESSAGE,created_at,start_time,end_time,INTENT_RESULT,SIMILARITY_RESULT,IS_ONLINE " \ f"from {TABLE_NAME} " \ f"where robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) res_ = cur.fetchall() if len(res_): row = res_[0] # config_ = get_data(robot_id, row[1]) # print(row[7]) intent_res = {} if str(row[7]) in ["", "NULL", "null", "None", "none"]: pass else: intent_res = json.loads(str(row[7]).strip()) similar_res = {} if str(row[8]) in ["", "NULL", "null", "None", "none"]: pass else: similar_res = json.loads(str(row[8]).strip()) # 去除 intent, acc_res = round( (similar_res.get("dev", 0.0) + similar_res.get("dev", 0.0)) / 2, 2) # config_ = get_data_by_id(row[7]) # config_ = json.loads(config_) # train_type = config_.get("train_type", "deep") if str(row[2]) == "0": sql = f"select robot_id,version_id,status,STATUS_MESSAGE,created_at,start_time,end_time " \ f"from {TABLE_NAME} " \ f"where robot_id='{robot_id}'and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) res__ = cur.fetchall() versions = [int(_[1]) for _ in res__ if str(_[2]) == "0"] # 获取最新的运行的状态 versions = sorted(versions, key=lambda x: x, reverse=True) print(versions), logging.info(versions) if int(row[1]) in versions[:2]: result_ = { "code": 0, "msg": "ok!", "data": { "status": "running", "statusMessage": "正在运作中...", "result": acc_res, "version": int(row[1]), "createTime": str(row[4]).strip("None"), "startTime": str(row[5]).strip("None"), "endTime": str(row[6]).strip("None"), "isOnline": bool(row[9]) } } else: result_ = { "code": 0, "msg": "ok!", "data": { "status": "running", "statusMessage": "训练成功,但已删除(老版本)...", "result": acc_res, "version": int(row[1]), "createTime": str(row[4]).strip("None"), "startTime": str(row[5]).strip("None"), "endTime": str(row[6]).strip("None"), "isOnline": bool(row[9]) } } elif str(row[2]) == "1": # 捕捉队列的信息 sql = f"select robot_id,version_id,status " \ f"from {TABLE_NAME} " \ f"where status=1 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}' order by CREATED_AT" cur.execute(sql) robot_status = cur.fetchall() count = 0 for row_ in robot_status: count += 1 if str(row_[0]) == str(robot_id) and str( row_[1]) == str(version): break result_ = { "code": 0, "msg": "ok!", "data": { "status": "queue", "statusMessage": f"前面还有{count}个机器人在排队中...", "result": acc_res, "version": int(row[1]), "createTime": str(row[4]).strip("None"), "startTime": str(row[5]).strip("None"), "endTime": str(row[6]).strip("None"), "isOnline": bool(row[9]) } } elif str(row[2]) == "2": result_ = { "code": 0, "msg": "ok!", "data": { "status": "training", "statusMessage": "正在训练中", "result": acc_res, "version": int(row[1]), "createTime": str(row[4]).strip("None"), "startTime": str(row[5]).strip("None"), "endTime": str(row[6]).strip("None"), "isOnline": bool(row[9]) } } elif str(row[2]) == "3": result_ = { "code": 0, "msg": "ok!", "data": { "status": "error", "statusMessage": str(row[3]), "version": int(row[1]), "result": acc_res, "createTime": str(row[4]).strip("None"), "startTime": str(row[5]).strip("None"), "endTime": str(row[6]).strip("None"), "isOnline": bool(row[9]) } } else: result_ = {"code": -10, "data": {}, "msg": "状态码异常,不在范围内!"} else: result_ = {"code": -9, "data": {}, "msg": "机器和版本不存在!"} # {"code": 0, "msg": "ok!", "data": {"status": "error", "statusMessage": "机器不存在"}} cur.close() conn.close() return result_
def fun(): # robot_id=1, version=1 默认,固定版本 _ = f"机器编号:{robot_id}, 版本编号:{version}" print(_), logging.info(_) conn = pool.connection() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() identifier = False lock_name = f"lock_name_{robot_id}_{version}" try: # 加锁, 修改为1s ,拿不到就直接返回了 identifier = acquire_lock(lock_name, acquire_time=1, time_out=10) if not identifier: return { "code": -2, "data": {}, "msg": f"{robot_id}, {version}并发锁异常!" } # step-1, 检测 机器、版本是否存在 sql = f"SELECT robot_id,version_id,status " \ f"FROM {TABLE_NAME} " \ f"where robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) result_ = cur.fetchall() if len(result_): # print(result_[0]) return {"code": -3, "data": {}, "msg": "机器和版本已存在!"} sql = f"SELECT robot_id,version_id,status " \ f"FROM {TABLE_NAME} " \ f"where DELETE_FLAG=0 and CLUSTER='{CLUSTER}';" cur.execute(sql) result_ = cur.fetchall() data = [] for i in result_: data.append({ "robot_id": i[0], "version": i[1], "status": i[2], }) robot_versions = {} for i in data: robot_versions[i["robot_id"]] = robot_versions.get( i["robot_id"], []) + [int(i["version"])] _ = f"当前机器版本:{robot_versions}" print(_), logging.info(_) if len(robot_versions) >= 15 and robot_id not in robot_versions: return {"code": -4, "data": {}, "msg": "机器人数量限制,目前不能超过15!"} # step-2, 检测,技能组件是否存在,不存在则提示,存在则插入 # step-3,通过后,保存config model 文件,后面加载调用 (非常耗时的任务) config = { # 机器人ID "robotId": robot_id, # 版本ID "version": version, # 知识库问答 "questions": questions, # 意图识别库 "intents": intents, # 槽位抽取库 "slots": slots, } # 修改为,数据库插入任务数据 col = "robot_id,version_id,es_id,es_link,status," \ "CREATED_BY,CREATED_AT,UPDATED_BY,UPDATED_AT,DELETE_FLAG,CLUSTER" t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S") doc_ = { "robot": robot_id, "version": version, "json_data": json.dumps(config, ensure_ascii=False), } # 2020-12-05 改成mongodb进行读取 # 返回es 插入的id # a_ = insert_data(doc_) # if a_ is None: # return {"code": -7, "data": {}, "msg": "插入数据错误!"} # es_id = a_["_id"] # es_link = f'{es_dict[BRANCH]["url"]}/{INDEX_NAME}/_doc/{es_id}' # print(es_link) es_link = "" try: # 修改使用mongodb 进行存储训练机器人数据 insert_one_result = collection.insert_one(config) es_id = str(insert_one_result.inserted_id) except Exception as e: print(repr(e)), logging.error(repr(e)) return {"code": -7, "data": {}, "msg": "插入数据错误!"} # 2020-12-05 改成mongodb进行读取 sql = f'insert into {TABLE_NAME}({col}) ' \ f'VALUES("{robot_id}", "{version}", "{es_id}", "{es_link}", 1, "实在科技", NOW(), ' \ f'"实在科技", NOW(), 0, "{CLUSTER}")' # print(sql), logging.info(sql) index = cur.execute(sql) conn.commit() except Exception as e: traceback.print_exc() return {"code": -1, "data": {}, "msg": repr(e)} finally: if identifier: release_lock(lock_name, identifier) else: return { "code": -2, "data": {}, "msg": f"{robot_id}, {version}并发锁异常!" } cur.close() conn.close() return {"code": 0, "msg": "ok!"}