Python SQLContext.range примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyspark

Класс/Тип: SQLContext

Метод/Функция: range

Примеров на hotexamples.com: 2

Python SQLContext.range - 2 примера найдено. Это лучшие примеры Python кода для pyspark.SQLContext.range, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

sql(30)

createDataFrame(30)

SQLContext(28)

getOrCreate(17)

setConf(14)

registerDataFrameAsTable(10)

load(4)

cacheTable(4)

jsonFile(3)

show(3)

parquetFile(3)

registerFunction(3)

withColumn(2)

dropTempTable(2)

tableNames(2)

clearCache(2)

range(2)

applySchema(2)

jsonRDD(2)

inferSchema(2)

groupby(1)

printSchema(1)

select(1)

persist(1)

filter(1)

Пример #1

Показать файл

def run(t1, context_string, configPath='./jsonFormat/ACTCLR.json'):
    spark = SQLContext(SparkContext.getOrCreate())

    hive = HiveContext(SparkContext.getOrCreate())

    jsonobj = {}

    #load json obj from file
    with open(configPath) as cfgPath:
        jsonobj = json.load(cfgPath)

    print("jsonobj", jsonobj)
    tables = {}

    for name, prof in jsonobj["tables"].items():
        #parse table
        print("prof", name, prof)

        linenum = prof["property"]["lines"]
        df = spark.range(linenum)

        for fieldName, fieldProf in prof["field"].items():
            if fieldProf["type"].startswith(
                    "\"") and fieldProf["type"].endswith("\""):
                fieldProf["type"] = fieldProf["type"][1:-1]
            if fieldProf["type"].upper().startswith("VARCHAR") or \
                fieldProf["type"].upper().startswith("CHAR"):
                t_type = StringType
                baseType = "String"
            elif fieldProf["type"].upper().startswith("DECIMAL"):
                t_type = FloatType
                baseType = "Float"
            elif fieldProf["type"].upper().startswith("SMALLINT"):
                t_type = IntegerType
                baseType = "Integer"
            elif fieldProf["type"].upper().startswith("DATE"):
                t_type = DataType
                baseType = "Date"
            else:
                t_type = StringType
                baseType = "String"

            if fieldProf["createMod"] == "":
                udf_func = udf(
                    getattr(excuteFunc, baseType +
                            "Method")(constraint=fieldProf["constraint"]),
                    t_type())
            else:
                udf_func = udf(
                    getattr(excuteFunc, fieldProf["createMod"].upper() +
                            "_Method")(constraint=fieldProf["constraint"]),
                    t_type())

            df = df.withColumn(fieldName, udf_func("id"))

        tables[name] = df

    for i, j in tables.items():
        j.show(n=100, truncate=False)

    return [tables["ACTCLR"]]

Пример #2

Показать файл

Файл: introducing_spark.py Проект: kmandawe/spark2-demo

complex_data = sc.parallelize([Row(col_list=[1, 2, 3], col_dict={"k1": 0}, col_row=Row(a=10, b=20, c=30),
                                   col_time=datetime(2014, 8, 1, 14, 1, 5)),
                               Row(col_list=[1, 2, 3, 4, 5], col_dict={"k1": 0, "k2": 1}, col_row=Row(a=40, b=50, c=60),
                                   col_time=datetime(2014, 8, 2, 14, 1, 6)),
                               Row(col_list=[1, 2, 3, 4, 5, 6, 7], col_dict={"k1": 0, "k2": 1, "k3": 2},
                                   col_row=Row(a=70, b=80, c=90),
                                   col_time=datetime(2014, 8, 3, 14, 1, 7))
                               ])

complex_data_df = complex_data.toDF()
complex_data_df.show()

sqlContext = SQLContext(sc)
print(sqlContext)

df = sqlContext.range(5)
print(df)
df.show()
print(df.count())

data = [("Alice", 50), ("Bob", 80), ("Charlee", 75)]
sqlContext.createDataFrame(data).show()

sqlContext.createDataFrame(data, ['Name', 'Score']).show()

complex_data = [
    (1.0, 10, "Alice", True, [1, 2, 3], {"k1": 0}, Row(a=1, b=2, c=3), datetime(2014, 8, 1, 14, 1, 5)),
    (2.0, 20, "Bob", True, [1, 2, 3, 4, 5], {"k1": 0, "k2": 1}, Row(a=1, b=2, c=3), datetime(2014, 8, 1, 14, 1, 5)),
    (3.0, 30, "Charlee", False, [1, 2, 3, 4, 5, 6], {"k1": 0, "k2": 1, "k3": 2}, Row(a=1, b=2, c=3),
     datetime(2014, 8, 1, 14, 1, 5)),
]