# setup a context. local with 1 gig of memory. using spark conf for config from pyspark import SparkConf, SparkContext conf = (SparkConf() .setMaster("local") .setAppName("My app") .set("spark.executor.memory", "1g")) sc = SparkContext(conf=conf) # begin a session using the context from pyspark.sql.session import SparkSession spark = SparkSession(sc) # let's load a range myRange = spark.range(1000).toDF("number") divisBy2 = myRange.where("number % 2 = 0") divisBy2.count() myRange.createOrReplaceTempView("myrange") sqlExample = spark.sql("SELECT * from myrange") sqlExample.show() flightData2015 = spark.read\ .option("inferSchema", "true")\ .option("header", "true")\ .csv("c:\\projects\\Spark-TDG\\data\\flight-data\\csv\\2015-summary.csv") flightData2015.take(3) flightData2015.sort("count").explain()