示例#1
0
# setup a context. local with 1 gig of memory. using spark conf for config
from pyspark import SparkConf, SparkContext
conf = (SparkConf()
         .setMaster("local")
         .setAppName("My app")
         .set("spark.executor.memory", "1g"))
sc = SparkContext(conf=conf)

# begin a session using the context
from pyspark.sql.session import SparkSession
spark = SparkSession(sc)

# let's load a range
myRange = spark.range(1000).toDF("number")
divisBy2 = myRange.where("number % 2 = 0")
divisBy2.count()

myRange.createOrReplaceTempView("myrange")
sqlExample = spark.sql("SELECT * from myrange")
sqlExample.show()

flightData2015 = spark.read\
.option("inferSchema", "true")\
.option("header", "true")\
.csv("c:\\projects\\Spark-TDG\\data\\flight-data\\csv\\2015-summary.csv")

flightData2015.take(3)

flightData2015.sort("count").explain()