Python MinMaxScaler.getOutputCol示例

编程语言: Python

命名空间/包名称: pyspark.ml.feature

类/类型: MinMaxScaler

方法/功能: getOutputCol

hotexamples.com的示例: 3

Python MinMaxScaler.getOutputCol - 已找到3个示例。这些是从开源项目中提取的最受好评的pyspark.ml.feature.MinMaxScaler.getOutputCol现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MinMaxScaler(30)

fit(25)

getMax(5)

getMin(5)

setMax(4)

getOutputCol(3)

transform(3)

getInputCol(1)

load(1)

setMin(1)

withColumn(1)

示例#1

显示文件

文件： model_1.py 项目： clowee/Public-Code-Repositories-Analysis

def get_ml1_pipeline():
    stages = []

    imputer = Imputer(inputCols=ML1_NUMERICAL_COLUMNS , outputCols=ML1_NUMERICAL_COLUMNS )
    stages.append(imputer)

    ohe_input_cols = []
    ohe_output_cols = []
    for categorical_column in ML1_CATEGORICAL_COLUMNS:
        str_indexer = StringIndexer(inputCol=categorical_column, outputCol=categorical_column + "_index", handleInvalid='keep')
        ohe_input_cols.append(str_indexer.getOutputCol())
        ohe_output_cols.append(categorical_column + "_class_vec")
        stages.append(str_indexer)

    encoder = OneHotEncoderEstimator(inputCols=ohe_input_cols, outputCols=ohe_output_cols, handleInvalid="error", dropLast=False)
    stages.append(encoder)

    numerical_vector_assembler = VectorAssembler(inputCols=ML1_NUMERICAL_COLUMNS , outputCol="numerial_cols_vec", handleInvalid="keep")
    scaler = MinMaxScaler(inputCol="numerial_cols_vec", outputCol= "scaled_numerical_cols")
    stages.append(numerical_vector_assembler)
    stages.append(scaler)

    label_str_indexer = StringIndexer(inputCol="result", outputCol="label", handleInvalid="keep")
    stages.append(label_str_indexer)

    assembler_input = encoder.getOutputCols() + [scaler.getOutputCol()]
    assembler = VectorAssembler(inputCols= assembler_input, outputCol="features", handleInvalid="skip")
    stages.append(assembler)

    pipeline = Pipeline(stages = stages)
    return pipeline

示例#2

显示文件

def build_pipeline (pipeconfig: dict) -> pyspark.ml.Pipeline:
    '''
    Build a Pipeline instance based on config file
    :param pipeconfig: metadata dictionary
    :return: pyspark.ml.Pipeline
    '''

    # Pipeline metadata
    cats = pipeconfig['variables']['categoricals']
    nums = pipeconfig['variables']['numericals']
    index_names = pipeconfig['metadata']['index_names']
    encoded_names = pipeconfig['metadata']['encoded_names']
    vect_name = pipeconfig['metadata']['vect_name']
    feats_name = pipeconfig['metadata']['feats_name']
    labelcol = pipeconfig['model']['labelCol']
    maxdepth = pipeconfig['model']['maxDepth']
    maxbins = pipeconfig['model']['maxBins']
    maxiter = pipeconfig['model']['maxIter']
    seed = pipeconfig['model']['seed']

    # Build stages
    stageone = StringIndexer(inputCols=cats,
                             outputCols=index_names)

    stagetwo = OneHotEncoder(dropLast=False,
                             inputCols=stageone.getOutputCols(),
                             outputCols=encoded_names)

    stagethree = VectorAssembler(inputCols=nums + stagetwo.getOutputCols(),
                                 outputCol=vect_name)

    stagefour = MinMaxScaler(inputCol=stagethree.getOutputCol(),
                             outputCol=feats_name)

    stagefive = GBTClassifier(featuresCol=stagefour.getOutputCol(),
                              labelCol=labelcol,
                              maxDepth=maxdepth,
                              maxBins=maxbins,
                              maxIter=maxiter,
                              seed=seed)
    pipeline = Pipeline(stages=[stageone, stagetwo, stagethree, stagefour, stagefive])

    return pipeline

示例#3

显示文件

文件： 04 Machine Learning.py 项目： richiebachala/Databricks-and-Spark

numTrees = 25
maxDepth = 5
maxBins = 5

# COMMAND ----------

# MAGIC %md
# MAGIC #### 4.2 Initialize the Model
# MAGIC
# MAGIC In this step, we are only giving instructions to the algorithm and chaining it to the feature engineering steps. We will not train the model, yet.

# COMMAND ----------

rf = RandomForestClassifier(
    labelCol="conversion",  # Label we are trying to predict 
    featuresCol=scaler.getOutputCol(
    ),  # Feature names from last step of the pipeline
    numTrees=numTrees,  # Parameters
    maxDepth=maxDepth,
    maxBins=maxBins)

pipeline = Pipeline(stages=[
    discretizer,  # Feature engineering steps
    index_pipeline,
    encoder,
    vec_assembler,
    scaler,
    rf  # Initialized model
])

# COMMAND ----------