Python MinMaxScaler.getMax примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyspark.ml.feature

Класс/Тип: MinMaxScaler

Метод/Функция: getMax

Примеров на hotexamples.com: 6

Python MinMaxScaler.getMax - 6 примеров найдено. Это лучшие примеры Python кода для pyspark.ml.feature.MinMaxScaler.getMax, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MinMaxScaler(30)

fit(25)

getMax(5)

getMin(5)

setMax(4)

getOutputCol(3)

transform(3)

getInputCol(1)

load(1)

setMin(1)

withColumn(1)

Пример #1

Показать файл

Файл: scale.py Проект: kdjimadoumngar/CS5590-0001-Lab-2-Kim

 def scaling(dataFrame, inputColName, Min, Max):
     outputColName = "scaled " + inputColName
     assembler = VectorAssembler(inputCols=[inputColName], \
                                 outputCol="features")
     assembledDF = assembler.transform(dataFrame)
     scaler=MinMaxScaler(inputCol="features", \
                         outputCol=outputColName)
     scaler.setMax(Max)\
           .setMin(Min)
     scalerModel=scaler.fit(assembledDF)
     scaledDF = scalerModel.transform(assembledDF).drop("features")
     castVectorToFloat = udf(lambda v : float(v[0]), FloatType())
     scaledDF = scaledDF.withColumn(outputColName, castVectorToFloat(outputColName)) 
     print ("Successfully scale the column '{0:s}' to range ({1:f}, {2:f}) and create a new column '{3:s}'."\
             .format(inputColName,scaler.getMin(), scaler.getMax(), outputColName))
     return scaledDF

Пример #2

Показать файл

Файл: mmscaler_wrapper.py Проект: ssdzd/fitness_capstone

class mmscaler_wrapper():
    mmModel = ''
    originalMin = ''
    originalMax = ''

    def __init__(self, inputCol, outputCol, s_min=0, s_max=0):
        self.mmModel = MinMaxScaler(inputCol=inputCol, outputCol=outputCol)
        self.mmModel.setMin(s_min)
        self.mmModel.setMax(s_max)
        self.in_column = inputCol

    def get_input_col_name(self):
        return self.mmModel.getInputCol()

    def getMax(self):
        return self.mmModel.getMax()

    def getMin(self):
        return self.mmModel.getMin()

    def describe(self):
        print 'describe'

    def fit(self, df):
        col = self.mmModel.getInputCol()
        self.originalMin = df.select(col).rdd.flatMap(lambda x: x[0]).min()
        self.originalMax = df.select(col).rdd.flatMap(lambda x: x[0]).max()
        return self.mmModel.fit(df)

    #denormalize the value
    def denormalize(self, value):
        v = (value - self.getMin()) * (self.originalMax - self.originalMin) * (
            self.getMax() - self.getMin()) + self.originalMin
        if v or v == 0:
            return v
        else:
            return -999

    def denormalize_df(self, df):
        col = self.mmModel.getInputCol()

    def normalize(self, value):
        pass

Пример #3

Показать файл

Файл: Data engineering pyspark.py Проект: hmk88/Pyspark_ML_databricks_ApacheSpark

    1,
    Vectors.dense([2.0, 1.1, 1.0]),
), (
    2,
    Vectors.dense([3.0, 10.1, 3.0]),
)], ["id", "features"])

scaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures")

# Compute summary statistics and generate MinMaxScalerModel
scalerModel = scaler.fit(dataFrame)

# rescale each feature to range [min, max].
scaledData = scalerModel.transform(dataFrame)
print("Features scaled to range: [%f, %f]" %
      (scaler.getMin(), scaler.getMax()))
scaledData.select("features", "scaledFeatures").show()

# COMMAND ----------

###MaxAbsScaler (-1, 1)
from pyspark.ml.feature import MaxAbsScaler
from pyspark.ml.linalg import Vectors

dataFrame = spark.createDataFrame([(
    0,
    Vectors.dense([1.0, 0.1, -8.0]),
), (
    1,
    Vectors.dense([2.0, 1.0, -4.0]),
), (

Пример #4

Показать файл

Файл: min_max_scaler_example.py Проект: 11wzy001/spark

from pyspark.ml.feature import MinMaxScaler
from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession

if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("MinMaxScalerExample")\
        .getOrCreate()

    # $example on$
    dataFrame = spark.createDataFrame([
        (0, Vectors.dense([1.0, 0.1, -1.0]),),
        (1, Vectors.dense([2.0, 1.1, 1.0]),),
        (2, Vectors.dense([3.0, 10.1, 3.0]),)
    ], ["id", "features"])

    scaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures")

    # Compute summary statistics and generate MinMaxScalerModel
    scalerModel = scaler.fit(dataFrame)

    # rescale each feature to range [min, max].
    scaledData = scalerModel.transform(dataFrame)
    print("Features scaled to range: [%f, %f]" % (scaler.getMin(), scaler.getMax()))
    scaledData.select("features", "scaledFeatures").show()
    # $example off$

    spark.stop()

Пример #5

Показать файл

Файл: breast_cancer_df.py Проект: lucifer10001/Project_Early_Breast_Cancer_Analysis

#####################

########################
## RESCALING DATA SET ##
########################
# Typically for Neural Networks to perform better 
# a lot of preprocessing has to go into the data
# So I scaled the feature space to have min = 0 and max = 1

scaler = MinMaxScaler(inputCol='features', outputCol='scaledFeatures')

scalerModel = scaler.fit(df)

scaledData = scalerModel.transform(df)

print("Features scaled to range: [%f, %f]" % (scaler.getMin(), scaler.getMax()))

scaledData.select("features", "scaledFeatures").show()

new_df = scaledData.selectExpr("label", "radius_mean", "texture_mean", 
	"perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean",
	 "concavity_mean", "concave_points_mean", "symmetry_mean", 
	 "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", 
	 "area_se", "smoothness_se", "compactness_se", "concavity_se", 
	 "concave_points_se", "symmetry_se", "fractal_dimension_se", 
	 "radius_worst", "texture_worst", "perimeter_worst", 
	 "area_worst", "smoothness_worst", "compactness_worst", 
	 "concavity_worst", "concave_points_worst", "symmetry_worst", 
	 "fractal_dimension_worst","features as oldFeature", 
	 "scaledFeatures as features")

Пример #6

Показать файл

## part 2
print('*' * 100)
print('Part 2 - Normalize features between 0 and 1\n')

# assemble features values into a vector and create a feature containing those vectors
assembler = VectorAssembler().setInputCols(
    data.columns[1:]).setOutputCol('features')
transformed = assembler.transform(data)

# create scaler object, transform feature vectors and add scaledFeatures column
scaler = MinMaxScaler(inputCol='features', outputCol='scaledFeatures')
scalerModel = scaler.fit(transformed.select('features'))
scaledData = scalerModel.transform(transformed)

print('Features scaled to range: {} to {}'.format(scaler.getMin(),
                                                  scaler.getMax()))
# print(scaledData.select('_c0','features','scaledFeatures').show(10))

# limit dataset to label and scaled vectors
scaledData = scaledData.select('_c0', 'scaledFeatures')

# rename columns
scaledData = scaledData.withColumnRenamed('_c0', 'label').withColumnRenamed(
    'scaledFeatures', 'features')
print(scaledData.show(5))

####################################################################################
## part 3
print('*' * 100)
print('Part 3 - \n')