Пример #1
0
    def test_get_pai_tf_cmd(self):
        conf = get_cluster_config({})
        os.environ[
            "SQLFLOW_OSS_CHECKPOINT_DIR"] = '''{"Arn":"arn", "Host":"host"}'''
        cmd = submitter.get_pai_tf_cmd(
            conf, "job.tar.gz", "params.txt", "entry.py", "my_dnn_model",
            "user1/my_dnn_model", "test_project.input_table",
            "test_project.val_table", "test_project.res_table", "test_project",
            "/tmp")
        expected = (
            "pai -name tensorflow1150 -project algo_public_dev -DmaxHungTimeBeforeGCInSeconds=0 "
            "-DjobName=sqlflow_my_dnn_model -Dtags=dnn -Dscript=job.tar.gz -DentryFile=entry.py "
            "-Dtables=odps://test_project/tables/input_table,odps://test_project/tables/val_table "
            "-Doutputs=odps://test_project/tables/res_table -DhyperParameters=\"params.txt\" "
            "-DcheckpointDir='oss://sqlflow-models/user1/my_dnn_model/?role_arn=arn/pai2oss_test_project&host=host' "
            "-DgpuRequired='0'")
        self.assertEqual(expected, cmd)

        conf = get_cluster_config({"train.num_workers": 5})
        cmd = submitter.get_pai_tf_cmd(
            conf, "job.tar.gz", "params.txt", "entry.py", "my_dnn_model",
            "user1/my_dnn_model", "test_project.input_table",
            "test_project.val_table", "test_project.res_table", "test_project",
            "/tmp")
        expected = (
            "pai -name tensorflow1150 -project algo_public_dev -DmaxHungTimeBeforeGCInSeconds=0 "
            "-DjobName=sqlflow_my_dnn_model -Dtags=dnn -Dscript=job.tar.gz -DentryFile=entry.py "
            "-Dtables=odps://test_project/tables/input_table,odps://test_project/tables/val_table "
            "-Doutputs=odps://test_project/tables/res_table -DhyperParameters=\"params.txt\" "
            "-DcheckpointDir='oss://sqlflow-models/user1/my_dnn_model/?role_arn=arn/pai2oss_test_project&host=host' "
            r'''-Dcluster="{\"ps\": {\"count\": 1, \"cpu\": 200, \"gpu\": 0}, \"worker\": {\"count\": 5, \"cpu\": 400, \"gpu\": 0}}"'''
        )
        self.assertEqual(expected, cmd)
        del os.environ["SQLFLOW_OSS_CHECKPOINT_DIR"]
Пример #2
0
def get_pai_explain_cmd(datasource, project, oss_model_path, model_name,
                        data_table, result_table, model_type, model_params,
                        job_file, params_file, label_column, cwd):
    """Get command to submit explain task to PAI

    Args:
        datasource: current datasource
        project: current project
        oss_model_path: the place to load model
        model_name: model used to do prediction
        data_table: data table from which to load explain data
        result_table: table to store prediction result
        model_type: type of th model, see also get_oss_saved_model_type
        model_params: parameters specified by WITH clause
        job_file: tar file incldue code and libs to execute on PAI
        params_file: extra params file
        lable_column: name of the label
        cwd: current working dir

    Returns:
        The command to submit a PAI explain task
    """
    if model_type == EstimatorType.PAIML:
        cmd = get_explain_random_forests_cmd(datasource, model_name,
                                             data_table, result_table,
                                             label_column)
    else:
        conf = cluster_conf.get_cluster_config(model_params)
        cmd = get_pai_tf_cmd(conf,
                             "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE),
                             "file://" + os.path.join(cwd, PARAMS_FILE),
                             ENTRY_FILE, model_name, oss_model_path,
                             data_table, "", result_table, project)
    return cmd
Пример #3
0
def submit_pai_explain(datasource, select, result_table, model_name,
                       model_attrs):
    """This function pack need params and resource to a tarball
    and submit a explain task to PAI

    Args:
        datasource: current datasource
        select: sql statement to get explain data set
        result_table: the table name to save result
        model_name: model used to do prediction
        model_params: dict, Params for training, crossponding to WITH clause
    """
    params = dict(locals())

    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")
    # TODO(typhoonzero): Do **NOT** create tmp table when the select statement
    # is like: "SELECT fields,... FROM table"
    data_table = create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    # format resultTable name to "db.table" to let the codegen form a
    # submitting argument of format "odps://project/tables/table_name"
    project = get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)

    oss_model_path = get_oss_model_save_path(datasource, model_name)
    model_type, estimator = get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    params["oss_model_path"] = oss_model_path

    label_column = model_attrs.get("label_col")
    params["label_column"] = label_column
    create_explain_result_table(datasource, data_table, result_table,
                                model_type, estimator, label_column)

    conf = cluster_conf.get_cluster_config(model_attrs)

    if model_type == EstimatorType.PAIML:
        cmd = get_explain_random_forests_cmd(datasource, model_name,
                                             data_table, result_table,
                                             label_column)
    else:
        if model_type == EstimatorType.XGBOOST:
            params["entry_type"] = "explain_xgb"
        else:
            params["entry_type"] = "explain_tf"
        prepare_archive(cwd, conf, project, estimator, model_name, data_table,
                        "", oss_model_path, params)
        cmd = get_pai_tf_cmd(conf,
                             "file://" + path.join(cwd, JOB_ARCHIVE_FILE),
                             "file://" + path.join(cwd, PARAMS_FILE),
                             ENTRY_FILE, model_name, oss_model_path,
                             data_table, "", result_table, project)
    submit_pai_task(cmd, datasource)
    drop_tables([data_table], datasource)
Пример #4
0
def submit_pai_evaluate(datasource,
                        model_name,
                        select,
                        result_table,
                        model_attrs,
                        user=""):
    """Submit a PAI evaluation task

    Args:
        datasource: current datasource
        model_name: model used to do evaluation
        select: sql statement to get evaluate data set
        result_table: the table name to save result
        model_params: dict, Params for training, crossponding to WITH claus
    """

    params = dict(locals())
    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")

    project = table_ops.get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)
    oss_model_path = pai_model.get_oss_model_save_path(datasource,
                                                       model_name,
                                                       user=user)
    params["oss_model_path"] = oss_model_path

    model_type, estimator = pai_model.get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    if model_type == EstimatorType.PAIML:
        raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.")

    data_table = table_ops.create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    metrics = get_evaluate_metrics(model_type, model_attrs)
    params["metrics"] = metrics
    create_evaluate_result_table(datasource, result_table, metrics)

    conf = cluster_conf.get_cluster_config(model_attrs)

    if model_type == EstimatorType.XGBOOST:
        params["entry_type"] = "evaluate_xgb"
    else:
        params["entry_type"] = "evaluate_tf"
    prepare_archive(cwd, estimator, oss_model_path, params)
    cmd = get_pai_tf_cmd(conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE),
                         "file://" + os.path.join(cwd, PARAMS_FILE),
                         ENTRY_FILE, model_name, oss_model_path, data_table,
                         "", result_table, project)
    submit_pai_task(cmd, datasource)
    table_ops.drop_tables([data_table], datasource)
Пример #5
0
    def test_get_cluster_config(self):
        attrs = {
            "train.worker_cpu": 100,
            "train.worker_gpu": 0,
            "train.ps_cpu": 100,
        }
        conf = get_cluster_config(attrs)
        self.assertEqual(100, conf["worker"]["cpu"])
        self.assertEqual(0, conf["worker"]["gpu"])
        self.assertEqual(100, conf["ps"]["cpu"])

        attrs["train.worker_cpu"] = 100.0
        with self.assertRaises(SQLFlowDiagnostic) as ctx:
            get_cluster_config(attrs)
        self.assertEqual("value for cluster config should be int",
                         ctx.exception.args[0])

        attrs["train.worker_cpu"] = 100
        attrs["train.num_evaluator"] = 2
        with self.assertRaises(SQLFlowDiagnostic) as ctx:
            get_cluster_config(attrs)
        self.assertEqual("train.num_evaluator should only be 1 or 0",
                         ctx.exception.args[0])
Пример #6
0
def submit_pai_predict(datasource, select, result_table, label_column,
                       model_name, model_attrs):
    """This function pack needed params and resource to a tarball
    and submit a prediction task to PAI

    Args:
        datasource: current datasource
        select: sql statement to get prediction data set
        result_table: the table name to save result
        label_column: name of the label column, if not exist in select
        model_name: model used to do prediction
        model_params: dict, Params for training, crossponding to WITH clause
    """
    params = dict(locals())

    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")
    # TODO(typhoonzero): Do **NOT** create tmp table when the select statement
    # is like: "SELECT fields,... FROM table"
    data_table = create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    # format resultTable name to "db.table" to let the codegen form a
    # submitting argument of format "odps://project/tables/table_name"
    project = get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)

    oss_model_path = get_oss_model_save_path(datasource, model_name)
    params["oss_model_path"] = oss_model_path
    model_type, estimator = get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    setup_predict_entry(params, model_type)

    # (TODO:lhw) get train label column from model meta
    create_predict_result_table(datasource, data_table, result_table,
                                label_column, None, model_type)

    conf = cluster_conf.get_cluster_config(model_attrs)
    prepare_archive(cwd, conf, project, estimator, model_name, data_table, "",
                    oss_model_path, params)

    cmd = get_pai_predict_cmd(conf, datasource, project, oss_model_path,
                              model_name, data_table, result_table, model_type,
                              cwd)
    submit_pai_task(cmd, datasource)
    drop_tables([data_table], datasource)
Пример #7
0
def get_pai_predict_cmd(datasource, project, oss_model_path, model_name,
                        predict_table, result_table, model_type, model_params,
                        job_file, params_file, cwd):
    """Get predict command for PAI task

    Args:
        datasource: current datasource
        project: current project
        oss_model_path: the place to load model
        model_name: model used to do prediction
        predict_table: where to store the tmp prediction data set
        result_table: prediction result
        model_type: type of th model, see also get_oss_saved_model_type
        model_params: parameters specified by WITH clause
        job_file: tar file incldue code and libs to execute on PAI
        params_file: extra params file
        cwd: current working dir

    Returns:
        The command to submit PAI prediction task
    """
    # NOTE(typhoonzero): for PAI machine learning toolkit predicting, we can
    # not load the TrainStmt since the model saving is fully done by PAI.
    # We directly use the columns in SELECT statement for prediction, error
    # will be reported by PAI job if the columns not match.
    conf = cluster_conf.get_cluster_config(model_params)
    conn = db.connect_with_data_source(datasource)
    if model_type == EstimatorType.PAIML:
        schema = db.get_table_schema(conn, predict_table)
        result_fields = [col[0] for col in schema]
        return ('''pai -name prediction -DmodelName="%s"  '''
                '''-DinputTableName="%s"  -DoutputTableName="%s"  '''
                '''-DfeatureColNames="%s"  -DappendColNames="%s"''') % (
                    model_name, predict_table, result_table,
                    ",".join(result_fields), ",".join(result_fields))
    else:
        schema = db.get_table_schema(conn, result_table)
        result_fields = [col[0] for col in schema]
        # For TensorFlow and XGBoost, we build a pai-tf cmd to submit the task
        return get_pai_tf_cmd(conf, job_file, params_file, ENTRY_FILE,
                              model_name, oss_model_path, predict_table, "",
                              result_table, project)
Пример #8
0
def get_pai_train_cmd(datasource, estimator_string, model_name, train_table,
                      val_table, model_params, train_params, path_to_save,
                      job_file, params_file, cwd):
    """Get train model comman for PAI

    Args:
        datasource: current datasource
        estimator_string: estimator name, Keras class name, or XGBoost
        model_name: the model name to train
        train_table: data table from which to load train data
        val_table: data table from which to load evaluate data
        model_params: params for training, crossponding to WITH clause
        train_params: parmas for the trainning process
        path_to_save: path to save the model
        job_file: tar file incldue code and libs to execute on PAI
        params_file: extra params file
        cwd: current working dir

    Returns:
        The command to submit a PAI train task
    """
    project = table_ops.get_project(datasource)
    conf = cluster_conf.get_cluster_config(model_params)
    if estimator_string.lower() == "randomforests":
        cmd = get_train_random_forest_pai_cmd(
            model_name, train_table, model_params,
            train_params["feature_column_names"],
            train_params["label_meta"]["feature_name"])
    elif estimator_string.lower() == "kmeans":
        cmd = get_train_kmeans_pai_cmd(datasource, model_name, train_table,
                                       model_params,
                                       train_params["feature_column_names"])
    else:
        cmd = get_pai_tf_cmd(conf, job_file, params_file, ENTRY_FILE,
                             model_name, path_to_save, train_table, val_table,
                             "", project)
    return cmd
Пример #9
0
def submit_pai_train(datasource, estimator_string, select, validation_select,
                     model_params, model_name, pre_trained_model,
                     **train_params):
    """This function submit PAI-TF train task to PAI platform

    Args:
        datasource: string
            Like: odps://access_id:[email protected]/api?
                         curr_project=test_ci&scheme=http
        estimator_string: string
            TensorFlow estimator name, Keras class name, or XGBoost
        select: string
            The SQL statement for selecting data for train
        validation_select: string
            Ths SQL statement for selecting data for validation
        model_params: dict
            Params for training, crossponding to WITH clause
        pre_trained_model: string
            The pre-trained model name to load
        train_params: dict
            Extra train params, they will be passed to runtime.tensorflow.train
    """

    # prepare params for tensorflow train,
    # the params will be pickled into train_params.pkl
    params = dict(locals())
    del params["train_params"]
    params.update(train_params)

    if estimator_string.lower().startswith("xgboost"):
        params["entry_type"] = "train_xgb"
    else:
        params["entry_type"] = "train_tf"

    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")
    conf = cluster_conf.get_cluster_config(model_params)

    train_table, val_table = create_train_and_eval_tmp_table(
        select, validation_select, datasource)
    params["pai_table"], params["pai_val_table"] = train_table, val_table

    # clean target dir
    path_to_save = get_oss_model_save_path(datasource, model_name)
    path_to_load = get_oss_model_save_path(datasource, pre_trained_model)
    project = get_project(datasource)
    params["oss_model_dir"] = path_to_save

    if path_to_load == "" or path_to_load != path_to_save:
        clean_oss_model_path(path_to_save + "/")

    # zip all required resource to a tarball
    prepare_archive(cwd, conf, project, estimator_string, model_name,
                    train_table, val_table, path_to_save, params)

    # submit pai task to execute the training
    if estimator_string.lower() == "randomforests":
        cmd = get_train_random_forest_pai_cmd(
            model_name, train_table, model_params,
            train_params["feature_column_names"],
            train_params["label_meta"]["feature_name"])
    elif estimator_string.lower() == "kmeans":
        cmd = get_train_kmeans_pai_cmd(datasource, model_name, train_table,
                                       model_params,
                                       train_params["feature_column_names"])
    else:
        cmd = get_pai_tf_cmd(conf,
                             "file://" + path.join(cwd, JOB_ARCHIVE_FILE),
                             "file://" + path.join(cwd, PARAMS_FILE),
                             ENTRY_FILE, model_name, path_to_save, train_table,
                             val_table, "", project)
    submit_pai_task(cmd, datasource)

    # save trained model to sqlfs
    save_model_to_sqlfs(datasource, path_to_save, model_name)
    drop_tables([train_table, val_table], datasource)
def submit_pai_evaluate(datasource,
                        original_sql,
                        select,
                        label_name,
                        model,
                        model_params,
                        result_table,
                        user=""):
    """Submit a PAI evaluation task

    Args:
        datasource: string
            Like: maxcompute://ak:[email protected]/api?
                  curr_project=test_ci&scheme=http
        original_sql: string
            Original "TO PREDICT" statement.
        select: string
            SQL statement to get prediction data set.
        model: string
            Model to load and do prediction.
        label_name: string
            The label name to evaluate.
        model_params: dict
            Params for training, crossponding to WITH clause.
        result_table: string
            The table name to save prediction result.
        user: string
            A string to identify the user, used to load model from the user's
            directory.
    """

    params = dict(locals())
    project = table_ops.get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)
    params["result_table"] = result_table

    oss_model_path = pai_model.get_oss_model_save_path(datasource,
                                                       model,
                                                       user=user)

    model_type, estimator = pai_model.get_saved_model_type_and_estimator(
        datasource, model)
    if model_type == EstimatorType.PAIML:
        raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.")

    if model_type == EstimatorType.XGBOOST:
        params["entry_type"] = "evaluate_xgb"
        validation_metrics = model_params.get("validation.metrics",
                                              "accuracy_score")
    else:
        params["entry_type"] = "evaluate_tf"
        validation_metrics = model_params.get("validation.metrics", "Accuracy")

    validation_metrics = [m.strip() for m in validation_metrics.split(",")]
    with db.connect_with_data_source(datasource) as conn:
        result_column_names = create_evaluate_table(conn, result_table,
                                                    validation_metrics)

    with table_ops.create_tmp_tables_guard(select, datasource) as data_table:
        params["pai_table"] = data_table
        params["result_column_names"] = result_column_names

        if try_pai_local_run(params, oss_model_path):
            return

        conf = cluster_conf.get_cluster_config(model_params)
        with temp_file.TemporaryDirectory(prefix="sqlflow", dir="/tmp") as cwd:
            prepare_archive(cwd, estimator, oss_model_path, params)
            cmd = get_pai_tf_cmd(
                conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE),
                "file://" + os.path.join(cwd, PARAMS_FILE), ENTRY_FILE, model,
                oss_model_path, data_table, "", result_table, project)
            submit_pai_task(cmd, datasource)
Пример #11
0
def submit_pai_evaluate(datasource,
                        original_sql,
                        select,
                        model_name,
                        model_params,
                        result_table,
                        user=""):
    """Submit a PAI evaluation task

    Args:
        datasource: string
            Like: maxcompute://ak:[email protected]/api?
                  curr_project=test_ci&scheme=http
        original_sql: string
            Original "TO PREDICT" statement.
        select: string
            SQL statement to get prediction data set.
        model_name: string
            Model to load and do prediction.
        model_params: dict
            Params for training, crossponding to WITH clause.
        result_table: string
            The table name to save prediction result.
        user: string
            A string to identify the user, used to load model from the user's
            directory.
    """

    params = dict(locals())
    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")

    project = table_ops.get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)
    params["result_table"] = result_table

    oss_model_path = pai_model.get_oss_model_save_path(datasource,
                                                       model_name,
                                                       user=user)
    params["oss_model_path"] = oss_model_path

    model_type, estimator = pai_model.get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    if model_type == EstimatorType.PAIML:
        raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.")

    data_table = table_ops.create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    metrics = get_evaluate_metrics(model_type, model_params)
    params["metrics"] = metrics
    create_evaluate_result_table(datasource, result_table, metrics)

    conf = cluster_conf.get_cluster_config(model_params)

    if model_type == EstimatorType.XGBOOST:
        params["entry_type"] = "evaluate_xgb"
    else:
        params["entry_type"] = "evaluate_tf"
    prepare_archive(cwd, estimator, oss_model_path, params)
    cmd = get_pai_tf_cmd(conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE),
                         "file://" + os.path.join(cwd, PARAMS_FILE),
                         ENTRY_FILE, model_name, oss_model_path, data_table,
                         "", result_table, project)
    submit_pai_task(cmd, datasource)
    table_ops.drop_tables([data_table], datasource)