Пример #1
0
def long_task_train(config):
    conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
    cur = conn.cursor()
    try:
        # step 1 更新数据库状态
        robot_id = config["robotId"]
        version_id = config["version"]
        # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        sql = f'UPDATE {TABLE_NAME} SET status=2,UPDATED_AT=NOW(),START_TIME=NOW() ' \
              f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
        print(sql), logging.info(sql)
        index = cur.execute(sql)
        conn.commit()
        print(index), logging.info(index)

        def do_update_mysql():
            # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            sql_ = f'UPDATE {TABLE_NAME} SET TRAINING_TIME=NOW(),UPDATED_AT=NOW() ' \
                   f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
            # print(sql), logging.info(sql)
            index = cur.execute(sql_)
            conn.commit()



        # step 3 训练模型(训练 意图)
        # _ = "start train intent ..."
        # print(_), logging.info(_)
        # command_0 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_intent -r {robot_id} -v {version_id}'
        # thread_0 = threading.Thread(target=do_thread, args=('1', command_0))
        # thread_0.start()
        # while True:
        #     if not thread_0.is_alive():
        #         break
        #     do_update_mysql()
        #     time.sleep(20)

        @time_spend
        def do_robot_config1():
            # 预处理机器人配置文件,方便加载近内存
            try:
                print("开始第一次载入pickle")
                c = Config(config)
                print("开始第一次dumppickle")
                c.dumps()
            except:
                traceback.print_exc()

        _ = "start dumps robot config ..."
        print(_), logging.info(_)
        thread_ = threading.Thread(target=do_robot_config1, )
        thread_.start()
        while True:
            if not thread_.is_alive():
                break
            do_update_mysql()
            time.sleep(10)

        # step 4 训练模型(训练 相似度)
        _ = "start train similarity ..."
        print(_), logging.info(_)
        # os.system("source activate baili")
        # command_1 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_similarity -r {robot_id} -v {version_id}'
        # # command_1 = f'CUDA_VISIBLE_DEVICES=0 conda activate baili && {PYTHON_BIN_PATH} -m train_similarity -r {robot_id} -v {version_id}'
        # thread_1 = threading.Thread(target=do_thread, args=('1', command_1))
        # thread_1.start()
        # while True:
        #     if not thread_1.is_alive():
        #         break
        #     do_update_mysql()
        #     time.sleep(20)

        _ = "start train sentence bert ..."
        print(_), logging.info(_)
        command_2 = f'CUDA_VISIBLE_DEVICES=0 {PYTHON_BIN_PATH} -m train_similarity_sentenceBERT -r {robot_id} -v {version_id}'
        print(command_2)
        thread_2 = threading.Thread(target=do_thread, args=('1', command_2))
        thread_2.start()
        while True:
            if not thread_2.is_alive():
                break
            do_update_mysql()
            time.sleep(20)

        # step 2 执行任务(训练配置预处理)
        @time_spend
        def do_robot_config2():
            # 预处理机器人配置文件,方便加载近内存
            c = Config(config)
            c.process_sentence_bert()
            c.dumps()

        _ = "start dumps robot config ..."
        print(_), logging.info(_)
        thread_ = threading.Thread(target=do_robot_config2, )
        thread_.start()
        while True:
            if not thread_.is_alive():
                break
            do_update_mysql()
            time.sleep(10)






        # 删除老版本的缓存
        redis.delete(f"robot_{robot_id}_version_{version_id}_v2")

        # step 5 上传配置与模型 到sftp服务器
        _ = "start upload sftp ..."
        print(_), logging.info(_)

        @time_spend
        def upload_to_sftp():
            # 由于是隔离的,需要添加下 ftp 服务,上传、下载
            sftp = SftpServer(HOST, USER, PASSWORD, PORT)
            sftp.put_file(f'config_models/robot_{robot_id}_version_{version_id}.model',
                          f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}.model")

            # 上传训练好的模型文件

            if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_intent'):
                _ = "上传意图模型"
                print(_), logging.info(_)
                os.chdir(__PATH__)
                sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_intent',
                             f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_intent")
                _ = "上传模型配置完成, 上传相似度模型"
                print(_), logging.info(_)
            if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_similarity'):
                os.chdir(__PATH__)
                sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_similarity',
                             f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_similarity")
                _ = "上传相似度模型完成"
                print(_), logging.info(_)
                os.chdir(__PATH__)

            if os.path.exists(f'config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert'):
                os.chdir(__PATH__)
                sftp.put_dir(f'config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert',
                             f"./{ORIGIN_PATH}/config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert")
                _ = "上传senenceBert相似度模型完成"
                print(_), logging.info(_)
                os.chdir(__PATH__)
            sftp.close()

        thread_3 = threading.Thread(target=upload_to_sftp, )
        thread_3.start()
        while True:
            if not thread_3.is_alive():
                break
            do_update_mysql()
            time.sleep(20)

        # step 3 更新数据库状态 (上传之后,避免数据库状态 更新是, 模型已上传完成)
        robot_id = config["robotId"]
        version_id = config["version"]
        t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        sql = f'UPDATE {TABLE_NAME} SET status=0,UPDATED_AT=NOW(),END_TIME=NOW() ' \
              f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
        print(sql), logging.info(sql)
        index = cur.execute(sql)
        conn.commit()
        print(index), logging.info(index)
        _ = "训练完成,更新到数据库"
        print(_), logging.info(_)

        # 首先查询 当前机器的 训练好的版本情况
        sql = f"SELECT robot_id,version_id,status FROM {TABLE_NAME} " \
              f"where status=0 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
        cur.execute(sql)
        result_ = cur.fetchall()
        data = []
        for i in result_:
            data.append({
                "robot_id": i[0],
                "version": i[1],
                "status": i[2],
            })

        robot_versions = {}
        for i in data:
            robot_versions[i["robot_id"]] = robot_versions.get(i["robot_id"], []) + [int(i["version"])]

        _ = robot_versions
        print(_), logging.info(_)
        for robot_id, versions in robot_versions.items():
            versions = list(set(versions))
            versions = sorted(versions, key=lambda x: int(x), reverse=True)
            # print(versions)
            top2versions = versions[:2]
            for version_id in versions:
                if version_id in top2versions:
                    pass
                else:
                    if os.path.exists(f"./config_models/robot_{robot_id}_version_{version_id}.model"):
                        _ = f"remove old version, robot: {robot_id}, version: {version_id}"
                        print(_), logging.info(_)
                        try:
                            shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_intent")
                        except:
                            pass
                        try:
                            shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_similarity")
                        except:
                            pass
                        try:
                            shutil.rmtree(f"./config_models/robot_{robot_id}_version_{version_id}_similarity_sentbert")
                        except:
                            pass
                        try:
                            os.remove(f"./config_models/robot_{robot_id}_version_{version_id}.model")
                        except:
                            pass
                    else:
                        pass
        cur.close()
        conn.close()

    except Exception as e:
        # 发生异常,更新数据库状态,置为 3
        traceback.print_exc()
        _ = "error: {}".format(repr(e))
        print(_), logging.info(_)
        robot_id = config["robotId"]
        version_id = config["version"]
        t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        sql = f'UPDATE {TABLE_NAME} SET status=3,STATUS_MESSAGE="{_}",UPDATED_AT=NOW(),END_TIME=NOW() ' \
              f'WHERE robot_id="{robot_id}" and version_id="{version_id}" and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
        print(sql), logging.info(sql)
        index = cur.execute(sql)
        conn.commit()
        print(index), logging.info(index)
        traceback.print_exc()
        cur.close()
        conn.close()
Пример #2
0
def train_similarity_sentenceBERT(robot_id, version):
    """
    训练 意图识别 模型
    """
    max_seq_length = 24
    batch_size = 128
    labels = ["0", "1"]
    # 和蓝博士反复测试, bert-tiny 版,训练异常,一直无法学习,尝试多组参数(训练epoch、学习率、批次大小等)
    # pretrain_name = "bert-tiny"
    # 哈工大版本,可以学习
    # pretrain_name = "roberta_wwm_ext_3"
    # 经测试,下面预训练好相似度模型(sentence bert结构会加快收敛速度,由于测试数据少,准确率都在100%,这个无意义)
    pretrain_name = "distiluse-base-multilingual-cased-v2"
    train_dir = "train_files"
    # 初始化权重模型位置
    pretrain_path = f"pretrained_models/{pretrain_name}"
    path = f"config_models/robot_{robot_id}_version_{version}.model"
    print("model_path")
    print(path)
    if os.path.exists(pretrain_path):
        _ = f"start train sentence_bert model, robot_id: {robot_id}, version:{version} "
        print(_), logging.info(_)
        c: Config = pickle.load(open(path, "rb"))
        temp_dir = f"{train_dir}/robot_{robot_id}_version_{version}_sentbert"
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)
        examples_train, examples_dev = prepare_csv_data(c, temp_dir)
        # pretrain_path='/data4/azun/project_dialout/pretrained_models/distiluse-base-multilingual-cased-v2'
        print(pretrain_path)

        print("训练集")
        print(len(examples_train))
        print("测试集")
        print(len(examples_dev))
        if (len(examples_train) > 50000):
            examples_train = examples_train[:50000]
        if (len(examples_dev) > 5000):
            examples_dev = examples_dev[:4000]
        ####################### ####################### ####################### ####################### ####################### #######################
        ####################### ####################### ####################### ####################### ####################### #######################

        model = SentenceTransformer(pretrain_path)

        train_dataset = SentencesDataset(examples_train, model)
        train_dataloader = DataLoader(train_dataset,
                                      shuffle=True,
                                      batch_size=16)
        train_loss = losses.CosineSimilarityLoss(model)

        model.fit(train_objectives=[(train_dataloader, train_loss)],
                  epochs=1,
                  warmup_steps=100)
        model.save(
            f"config_models/robot_{robot_id}_version_{version}_similarity_sentbert"
        )
        print("模型保存成功,地址是:")
        print(
            f"config_models/robot_{robot_id}_version_{version}_similarity_sentbert"
        )

        ####################### ####################### ####################### ####################### ####################### #######################
        ####################### ####################### ####################### ####################### ####################### #######################
        result = {"train": 0.921, "dev": 0.932}
        # command = f"cp {pretrain_path}/bert_config.json config_models/robot_{robot_id}_version_{version}_similarity"
        # os.system(command)
        # command = f"cp {pretrain_path}/vocab.txt config_models/robot_{robot_id}_version_{version}_similarity"
        # os.system(command)
        # 需要上传下成绩,更新到数据库
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        try:
            similarity_result = json.dumps(result, ensure_ascii=False)
            sql_ = f"UPDATE {TABLE_NAME} SET SIMILARITY_RESULT='{similarity_result}',UPDATED_AT=NOW() " \
                   f"WHERE robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
            print(sql_)
            index = cur.execute(sql_)
            conn.commit()
        except Exception as e:
            print(repr(e))
            pass
        finally:
            cur.close()
            conn.close()
        #####这里是更新intent,result现在做只是为了适配以前的后端,以后删除
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        try:
            similarity_result = json.dumps(result, ensure_ascii=False)
            sql_ = f"UPDATE {TABLE_NAME} SET INTENT_RESULT='{similarity_result}',UPDATED_AT=NOW() " \
                   f"WHERE robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
            print(sql_)
            index = cur.execute(sql_)
            conn.commit()
        except Exception as e:
            print(repr(e))
            pass
        finally:
            cur.close()
            conn.close()
        print(result)
    else:
        _ = f"can not found, robot_id: {robot_id}, version:{version} "
        print(_), logging.info(_)
Пример #3
0
def interval_sql_train():
    """
    由于频繁访问数据库,最好还是用一下数据库连接池!
    状态码:

    0 - 训练完成
    1 - 在队列中
    2 - 在训练中
    3 - 训练失败
    """
    delete_robot = redis.get("delete_robot")
    if delete_robot:
        delete_robot = str(delete_robot.decode())
        robots = delete_robot.split(";")
        for robot in robots:
            # 执行下线操作
            try:
                res = requests.post(f"{MODEL_MANAGER_HOST}/api/delete_robot", json={"robotId": robot}).json()
                _ = f"{robot} : {res}"
                print(_), logging.info(_)
            except:
                pass
            # 下面命令会删除 模型文件
            command = f"rm -r ./config_models/robot_{robot}_version*"
            os.system(command)
            command = f"rm ./config_models/robot_{robot}_version*"
            os.system(command)

        # 不删除,因为可能多点,其他节点也需要操作,可能进行重复,不影响
        # redis.delete("delete_robot")
    else:
        pass
    conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
    cur = conn.cursor()
    # 首先查询是否存在训练中的任务
    sql = f"select robot_id,version_id,TRAINING_TIME from {TABLE_NAME} " \
          f"where status=2 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
    cur.execute(sql)
    result = cur.fetchall()
    if len(result):
        row = result[0]
        _ = "robot:{}, version:{} is in training....".format(row[0], row[1])
        print(_), logging.info(_)
        # 再加个判断失败的功能,一直监听训练是否无故卡死等状态
        # datetime.strptime(row[2], "%Y-%m-%d %H:%M:%S")

        sql = f'UPDATE {TABLE_NAME} SET status=3,UPDATED_AT=NOW() ' \
              f'WHERE TRAINING_TIME<SUBDATE(now(),interval 3 minute) and status=2 ' \
              f'and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
        print(sql), logging.info(sql)
        index = cur.execute(sql)
        conn.commit()
        print(index), logging.info(index)

    else:
        sql = f"select robot_id,version_id,es_id,status from {TABLE_NAME} " \
              f"where status=1 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}' order by CREATED_AT"
        cur.execute(sql)
        result = cur.fetchall()
        if len(result):
            row = result[0]
            _ = "robot:{}, version:{} is starting....".format(row[0], row[1])
            print(_), logging.info(_)
            try:
                # 2020-12-05 改成mongodb进行读取
                # 采用 es 获取 训练数据
                mongo_result = collection.find({"_id": ObjectId(row[2])})
                print(row[2])
                # res = get_data_by_id(row[2])
                # config = json.loads(res)
                config = mongo_result[0]
                # 2020-12-05 改成mongodb进行读取     1	1	1	Gg08WXUB-deteIE8h-bN	192.168.1.245:19200/algorithm_train_data_xs/_doc/Gg08WXUB-deteIE8h-bN	1	1	{"dev": 0.9743589743589745, "train": 0.9935064935064936}	{"dev": 1, "train": 1}		2020-11-03 16:20:01	2020-11-03 16:16:30	2020-11-03 16:20:21	实在科技	2020-10-24 14:12:18	实在科技	2020-11-13 15:00:30	0	1
                # 后台执行任务ObjectId("5fcaf28b8f762624170ca9a3")
                executor.submit(long_task_train, config)
            except Exception as e:
                _ = "error: {}".format(repr(e))
                print(_), logging.info(_)
                traceback.print_exc()
                robot_id = row[0]
                version_id = row[1]
                # t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                sql = f'UPDATE {TABLE_NAME} SET status=3,STATUS_MESSAGE="{_}",UPDATED_AT=NOW(),END_TIME=NOW() ' \
                      f'WHERE robot_id="{robot_id}" and version_id="{version_id}" ' \
                      f'and DELETE_FLAG=0 and CLUSTER="{CLUSTER}";'
                print(sql), logging.info(sql)
                index = cur.execute(sql)
                conn.commit()
        else:
            _ = "no task..."
            print(_), logging.info(_)
    cur.close()
    conn.close()
Пример #4
0
    def fun():
        _ = f"机器编号:{robot_id}"
        print(_), logging.info(_)
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        try:
            # step-1, 检测 机器 是否存在
            sql = f"SELECT robot_id,version_id,status " \
                  f"FROM {TABLE_NAME} " \
                  f"where robot_id='{robot_id}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"
            cur.execute(sql)
            result_ = cur.fetchall()
            if len(result_):
                sql = f"SELECT robot_id,version_id,status " \
                      f"FROM {TABLE_NAME} " \
                      f"where robot_id='{robot_id}' and status=2 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
                cur.execute(sql)
                result_ = cur.fetchall()
                if len(result_):
                    return {"code": -12, "msg": "机器人正在训练中...", "data": {}}
                t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                sql = f"UPDATE {TABLE_NAME} SET DELETE_FLAG=1, IS_ONLINE=0, UPDATED_AT=NOW() " \
                      f"where robot_id='{robot_id}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"
                print(sql), logging.info(sql)
                index = cur.execute(sql)
                conn.commit()
                print(index), logging.info(index)
                # 模型管理哪里执行下 下线操作

                try:
                    res = requests.post(
                        f"{MODEL_MANAGER_HOST}/api/delete_robot",
                        json={
                            "robotId": robot_id
                        }).json()
                    _ = f"{robot_id} : {res}"
                    print(_), logging.info(_)
                except:
                    pass

                # 同时需要更新下redis
                online_robot_versions = redis.get("online_robot_versions")
                if online_robot_versions:
                    online_robot_versions = str(online_robot_versions.decode())
                    now_online_robot_versions = []
                    for rv in online_robot_versions.split(";"):
                        r = rv.split(":")[0]
                        if r == str(robot_id):
                            continue
                        else:
                            now_online_robot_versions.append(rv)
                    # 重置当前 上线版本
                    redis.set("online_robot_versions",
                              ";".join(now_online_robot_versions))
                    _ = "now online robot_version:" + str(
                        redis.get("online_robot_versions").decode())
                    print(_), logging.info(_)

                delete_robot = redis.get("delete_robot")
                if delete_robot:
                    delete_robot = str(delete_robot.decode())
                    delete_robot += ";" + str(robot_id)
                    redis.set("delete_robot", delete_robot)
                    redis.expire("delete_robot", 60)
                else:
                    delete_robot = str(robot_id)
                    redis.set("delete_robot", delete_robot)
                    redis.expire("delete_robot", 60)
                print(redis.get("delete_robot"))
                return {"code": 0, "msg": "机器删除成功!", "data": {}}
            else:
                delete_robot = redis.get("delete_robot")
                if delete_robot:
                    delete_robot = str(delete_robot.decode())
                    delete_robot += ";" + str(robot_id)
                    redis.set("delete_robot", delete_robot)
                    redis.expire("delete_robot", 60)
                else:
                    delete_robot = str(robot_id)
                    redis.set("delete_robot", delete_robot)
                    redis.expire("delete_robot", 60)
                print(redis.get("delete_robot"))
                return {"code": -11, "data": {}, "msg": "机器不存在或已删除!"}
        except Exception as e:
            traceback.print_exc()
            _ = repr(e)
            print(_), logging.info(_)
            return {"code": -1, "data": {}, "msg": _}
            pass
        finally:
            cur.close()
            conn.close()
Пример #5
0
    def fun():
        _ = f"机器编号:{robot_id},版本编号:{version}"
        print(_), logging.info(_)
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        try:
            # step-1, 检测 机器 是否存在
            sql = f"SELECT robot_id,version_id,status " \
                  f"FROM {TABLE_NAME} " \
                  f"where robot_id='{robot_id}' and version_id='{version}' and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"
            cur.execute(sql)
            result_ = cur.fetchall()
            if len(result_):
                if result_[0][2] == "1":
                    return {"code": -12, "msg": "机器人还在排队中...", "data": {}}
                elif result_[0][2] == "2":
                    return {"code": -12, "msg": "机器人正在训练中...", "data": {}}
                elif result_[0][2] == "3":
                    return {"code": -12, "msg": "机器人该版本训练失败了_-_", "data": {}}
                # 修改数据库
                t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                # 发布上线了
                try:
                    res_ = requests.post(
                        f"{MODEL_MANAGER_HOST}/api/model_query",
                        json={
                            "robotId": robot_id,
                            "version": version
                        }).json()
                    if int(res_["data"]["intent_model"]) > 0:
                        # 查询模型存在,即跳过;
                        print("intent:", res_["data"]["intent_model"])
                        pass
                    else:
                        # 调用下接口,若是deep模式,会自动提起模型,或者fast模型,返回-1
                        _ = f"push model {robot_id}-{version} online.."
                        print(_), logging.info(_)
                        # payload_intent = {"modelName": "intent_model",
                        #                   "robotId": robot_id,
                        #                   "version": version,
                        #                   "text": "你好?", }
                        # payload_similar = {"modelName": "similar_model",
                        #                    "robotId": robot_id,
                        #                    "version": version,
                        #                    "text_as": ["今天天气", "今天天气"],
                        #                    "text_bs": ["杭州疫情", "今天天气不错"]}
                        payload_similar = {
                            "modelName": "sentence_bert",
                            "robotId": robot_id,
                            "version": version,
                            "text_as": "好的"
                        }
                        url = f"{MODEL_MANAGER_HOST}/api/model_predict"
                        # requests.post(url=url, json=payload_intent).json()
                        requests.post(url=url, json=payload_similar).json()
                        pass
                except Exception as e:
                    _ = repr(e)
                    print(_), logging.info(_)
                    return {"code": -12, "msg": "机器人该版本发布失败了_-_", "data": {}}

                # 当前版本置为1,其他版本置为0
                sql1 = f"UPDATE {TABLE_NAME} SET IS_ONLINE=1,UPDATED_AT=NOW() " \
                       f"where robot_id='{robot_id}' and version_id='{version}' " \
                       f"and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"

                sql2 = f"UPDATE {TABLE_NAME} SET IS_ONLINE=0,UPDATED_AT=NOW() " \
                       f"where robot_id='{robot_id}' and version_id !='{version}' " \
                       f"and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"
                index1 = cur.execute(sql1)
                index2 = cur.execute(sql2)
                print(sql1 + sql2), logging.info(sql1 + sql2)
                conn.commit()
                print(index1, index2), logging.info(index1 + index2)
                # 可以更新 到当前机器人所有的发布版本信息到redis,减少重复查询数据库操作
                sql = f"SELECT robot_id,version_id " \
                      f"FROM {TABLE_NAME} " \
                      f"where IS_ONLINE=1 and CLUSTER='{CLUSTER}' and DELETE_FLAG=0;"
                cur.execute(sql)
                result_ = cur.fetchall()
                if len(result_):
                    online_robot_versions = []
                    for row in result_:
                        online_robot_versions.append(row[0] + ":" + row[1])
                    _ = ";".join(online_robot_versions)
                    print(_), logging.info(_)
                    redis.set("online_robot_versions",
                              ";".join(online_robot_versions))
                    print(redis.get("online_robot_versions"))
                return {"code": 0, "msg": "机器发布成功!", "data": {}}
            else:
                return {"code": -11, "data": {}, "msg": "机器不存在或已删除!"}
        except Exception as e:
            traceback.print_exc()
            _ = repr(e)
            print(_), logging.info(_)
            return {"code": -1, "data": {}, "msg": _}
        finally:
            cur.close()
            conn.close()
Пример #6
0
    def fun():
        # 还需新增状态
        _ = "【查询模型状态】机器编号:{}, 版本编号:{}".format(robot_id, version)
        print(_), logging.info(_)

        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        """
        状态码:(由于设计,队列也更改为训练中)
        0 - 训练完成
        1 - 在队列中
        2 - 在训练中
        3 - 训练失败
        """
        # 首先查询是否存在 机器和改版本
        sql = f"select robot_id,version_id,status,STATUS_MESSAGE,created_at,start_time,end_time,INTENT_RESULT,SIMILARITY_RESULT,IS_ONLINE " \
              f"from {TABLE_NAME} " \
              f"where robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
        cur.execute(sql)
        res_ = cur.fetchall()
        if len(res_):
            row = res_[0]
            # config_ = get_data(robot_id, row[1])
            # print(row[7])
            intent_res = {}
            if str(row[7]) in ["", "NULL", "null", "None", "none"]:
                pass
            else:
                intent_res = json.loads(str(row[7]).strip())
            similar_res = {}
            if str(row[8]) in ["", "NULL", "null", "None", "none"]:
                pass
            else:
                similar_res = json.loads(str(row[8]).strip())
            # 去除 intent,
            acc_res = round(
                (similar_res.get("dev", 0.0) + similar_res.get("dev", 0.0)) /
                2, 2)
            # config_ = get_data_by_id(row[7])
            # config_ = json.loads(config_)
            # train_type = config_.get("train_type", "deep")
            if str(row[2]) == "0":
                sql = f"select robot_id,version_id,status,STATUS_MESSAGE,created_at,start_time,end_time " \
                      f"from {TABLE_NAME} " \
                      f"where robot_id='{robot_id}'and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
                cur.execute(sql)
                res__ = cur.fetchall()
                versions = [int(_[1]) for _ in res__ if str(_[2]) == "0"]
                # 获取最新的运行的状态
                versions = sorted(versions, key=lambda x: x, reverse=True)
                print(versions), logging.info(versions)

                if int(row[1]) in versions[:2]:
                    result_ = {
                        "code": 0,
                        "msg": "ok!",
                        "data": {
                            "status": "running",
                            "statusMessage": "正在运作中...",
                            "result": acc_res,
                            "version": int(row[1]),
                            "createTime": str(row[4]).strip("None"),
                            "startTime": str(row[5]).strip("None"),
                            "endTime": str(row[6]).strip("None"),
                            "isOnline": bool(row[9])
                        }
                    }
                else:
                    result_ = {
                        "code": 0,
                        "msg": "ok!",
                        "data": {
                            "status": "running",
                            "statusMessage": "训练成功,但已删除(老版本)...",
                            "result": acc_res,
                            "version": int(row[1]),
                            "createTime": str(row[4]).strip("None"),
                            "startTime": str(row[5]).strip("None"),
                            "endTime": str(row[6]).strip("None"),
                            "isOnline": bool(row[9])
                        }
                    }
            elif str(row[2]) == "1":
                # 捕捉队列的信息
                sql = f"select robot_id,version_id,status " \
                      f"from {TABLE_NAME} " \
                      f"where status=1 and DELETE_FLAG=0 and CLUSTER='{CLUSTER}' order by CREATED_AT"
                cur.execute(sql)
                robot_status = cur.fetchall()
                count = 0
                for row_ in robot_status:
                    count += 1
                    if str(row_[0]) == str(robot_id) and str(
                            row_[1]) == str(version):
                        break
                result_ = {
                    "code": 0,
                    "msg": "ok!",
                    "data": {
                        "status": "queue",
                        "statusMessage": f"前面还有{count}个机器人在排队中...",
                        "result": acc_res,
                        "version": int(row[1]),
                        "createTime": str(row[4]).strip("None"),
                        "startTime": str(row[5]).strip("None"),
                        "endTime": str(row[6]).strip("None"),
                        "isOnline": bool(row[9])
                    }
                }
            elif str(row[2]) == "2":
                result_ = {
                    "code": 0,
                    "msg": "ok!",
                    "data": {
                        "status": "training",
                        "statusMessage": "正在训练中",
                        "result": acc_res,
                        "version": int(row[1]),
                        "createTime": str(row[4]).strip("None"),
                        "startTime": str(row[5]).strip("None"),
                        "endTime": str(row[6]).strip("None"),
                        "isOnline": bool(row[9])
                    }
                }
            elif str(row[2]) == "3":
                result_ = {
                    "code": 0,
                    "msg": "ok!",
                    "data": {
                        "status": "error",
                        "statusMessage": str(row[3]),
                        "version": int(row[1]),
                        "result": acc_res,
                        "createTime": str(row[4]).strip("None"),
                        "startTime": str(row[5]).strip("None"),
                        "endTime": str(row[6]).strip("None"),
                        "isOnline": bool(row[9])
                    }
                }
            else:
                result_ = {"code": -10, "data": {}, "msg": "状态码异常,不在范围内!"}
        else:
            result_ = {"code": -9, "data": {}, "msg": "机器和版本不存在!"}
            # {"code": 0, "msg": "ok!", "data": {"status": "error", "statusMessage": "机器不存在"}}
        cur.close()
        conn.close()
        return result_
Пример #7
0
    def fun():
        # robot_id=1, version=1 默认,固定版本
        _ = f"机器编号:{robot_id}, 版本编号:{version}"
        print(_), logging.info(_)
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        identifier = False
        lock_name = f"lock_name_{robot_id}_{version}"
        try:
            # 加锁, 修改为1s ,拿不到就直接返回了
            identifier = acquire_lock(lock_name, acquire_time=1, time_out=10)
            if not identifier:
                return {
                    "code": -2,
                    "data": {},
                    "msg": f"{robot_id}, {version}并发锁异常!"
                }
            # step-1, 检测 机器、版本是否存在
            sql = f"SELECT robot_id,version_id,status " \
                  f"FROM {TABLE_NAME} " \
                  f"where robot_id='{robot_id}' and version_id='{version}' and DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
            cur.execute(sql)
            result_ = cur.fetchall()
            if len(result_):
                # print(result_[0])
                return {"code": -3, "data": {}, "msg": "机器和版本已存在!"}

            sql = f"SELECT robot_id,version_id,status " \
                  f"FROM {TABLE_NAME} " \
                  f"where DELETE_FLAG=0 and CLUSTER='{CLUSTER}';"
            cur.execute(sql)
            result_ = cur.fetchall()
            data = []
            for i in result_:
                data.append({
                    "robot_id": i[0],
                    "version": i[1],
                    "status": i[2],
                })
            robot_versions = {}
            for i in data:
                robot_versions[i["robot_id"]] = robot_versions.get(
                    i["robot_id"], []) + [int(i["version"])]

            _ = f"当前机器版本:{robot_versions}"
            print(_), logging.info(_)
            if len(robot_versions) >= 15 and robot_id not in robot_versions:
                return {"code": -4, "data": {}, "msg": "机器人数量限制,目前不能超过15!"}

            # step-2, 检测,技能组件是否存在,不存在则提示,存在则插入

            # step-3,通过后,保存config model 文件,后面加载调用 (非常耗时的任务)
            config = {
                # 机器人ID
                "robotId": robot_id,
                # 版本ID
                "version": version,
                # 知识库问答
                "questions": questions,
                # 意图识别库
                "intents": intents,
                # 槽位抽取库
                "slots": slots,
            }

            # 修改为,数据库插入任务数据
            col = "robot_id,version_id,es_id,es_link,status," \
                  "CREATED_BY,CREATED_AT,UPDATED_BY,UPDATED_AT,DELETE_FLAG,CLUSTER"
            t1 = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            doc_ = {
                "robot": robot_id,
                "version": version,
                "json_data": json.dumps(config, ensure_ascii=False),
            }
            # 2020-12-05 改成mongodb进行读取
            # 返回es 插入的id
            # a_ = insert_data(doc_)
            # if a_ is None:
            #     return {"code": -7, "data": {}, "msg": "插入数据错误!"}
            # es_id = a_["_id"]
            # es_link = f'{es_dict[BRANCH]["url"]}/{INDEX_NAME}/_doc/{es_id}'
            # print(es_link)
            es_link = ""
            try:
                # 修改使用mongodb 进行存储训练机器人数据
                insert_one_result = collection.insert_one(config)
                es_id = str(insert_one_result.inserted_id)
            except Exception as e:
                print(repr(e)), logging.error(repr(e))
                return {"code": -7, "data": {}, "msg": "插入数据错误!"}
            # 2020-12-05 改成mongodb进行读取
            sql = f'insert into {TABLE_NAME}({col}) ' \
                  f'VALUES("{robot_id}", "{version}", "{es_id}", "{es_link}",  1, "实在科技", NOW(), ' \
                  f'"实在科技", NOW(), 0, "{CLUSTER}")'
            # print(sql), logging.info(sql)
            index = cur.execute(sql)
            conn.commit()
        except Exception as e:
            traceback.print_exc()
            return {"code": -1, "data": {}, "msg": repr(e)}
        finally:
            if identifier:
                release_lock(lock_name, identifier)
            else:
                return {
                    "code": -2,
                    "data": {},
                    "msg": f"{robot_id}, {version}并发锁异常!"
                }
            cur.close()
            conn.close()
        return {"code": 0, "msg": "ok!"}