Python DataFrame.select示例

编程语言: Python

命名空间/包名称: teradataml.dataframe.dataframe

类/类型: DataFrame

方法/功能: select

hotexamples.com的示例: 2

Python DataFrame.select - 已找到2个示例。这些是从开源项目中提取的最受好评的teradataml.dataframe.dataframe.DataFrame.select现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DataFrame(8)

to_pandas(6)

select(2)

drop(1)

to_sql(1)

示例#1

显示文件

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    create_context(host=os.environ["AOA_CONN_HOST"],
                   username=os.environ["AOA_CONN_USERNAME"],
                   password=os.environ["AOA_CONN_PASSWORD"],
                   database=data_conf["schema"] if "schema" in data_conf and data_conf["schema"] != "" else None)

    feature_names = ["NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns", "BMI", "DiPedFunc", "Age"]
    target_name = "HasDiabetes"

    # read training dataset from Teradata and convert to pandas
    train_df = DataFrame(data_conf["table"])
    train_df = train_df.select([feature_names + [target_name]])
    train_pdf = train_df.to_pandas()

    # split data into X and y
    X_train = train_pdf.drop(target_name, 1)
    y_train = train_pdf[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb', XGBClassifier(eta=hyperparams["eta"],
                                            max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access later
    model.feature_names = feature_names
    model.target_name = target_name

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model, col_names=feature_names, target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")

    from xgboost import plot_importance
    model["xgb"].get_booster().feature_names = feature_names
    plot_importance(model["xgb"].get_booster(), max_num_features=10)
    save_plot("feature_importance.png")

    feature_importance = model["xgb"].get_booster().get_score(importance_type="weight")
    stats.record_stats(train_df,
                       features=feature_names,
                       predictors=["HasDiabetes"],
                       categorical=["HasDiabetes"],
                       importance=feature_importance,
                       category_labels={"HasDiabetes": {0: "false", 1: "true"}})

示例#2

显示文件

文件： training.py 项目： moen-chishti/AoaDemoModels

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    create_context(host=os.environ["AOA_CONN_HOST"],
                   username=os.environ["AOA_CONN_USERNAME"],
                   password=os.environ["AOA_CONN_PASSWORD"])

    feature_names = [
        "NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns",
        "BMI", "DiPedFunc", "Age"
    ]
    target_name = "HasDiabetes"

    # read training dataset from Teradata and convert to pandas
    train_df = DataFrame(data_conf["table"])
    train_df = train_df.select([feature_names + [target_name]])
    train_df = train_df.to_pandas()

    # split data into X and y
    X_train = train_df.drop(target_name, 1)
    y_train = train_df[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb',
                       XGBClassifier(eta=hyperparams["eta"],
                                     max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access later
    model.feature_names = feature_names
    model.target_name = target_name

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model,
                    col_names=feature_names,
                    target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")