def __init__(self, sparkContext, sparkSession=None, jsqlContext=None): """Creates a new SQLContext. >>> from datetime import datetime >>> sqlContext = SQLContext(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() >>> df.createOrReplaceTempView("allTypes") >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm if sparkSession is None: sparkSession = SparkSession.builder.getOrCreate() if jsqlContext is None: jsqlContext = sparkSession._jwrapped self.sparkSession = sparkSession self._jsqlContext = jsqlContext _monkey_patch_RDD(self.sparkSession) install_exception_handler() if SQLContext._instantiatedContext is None: SQLContext._instantiatedContext = self
def __init__(self, sparkContext, jsparkSession=None): """Creates a new SparkSession. >>> from datetime import datetime >>> spark = SparkSession(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() >>> df.registerTempTable("allTypes") >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ from pyspark.sql.context import SQLContext self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm if jsparkSession is None: jsparkSession = self._jvm.SparkSession(self._jsc.sc()) self._jsparkSession = jsparkSession self._jwrapped = self._jsparkSession.wrapped() self._wrapped = SQLContext(self._sc, self, self._jwrapped) _monkey_patch_RDD(self) install_exception_handler() if SparkSession._instantiatedContext is None: SparkSession._instantiatedContext = self
def __init__(self, sparkContext, jsparkSession=None): """Creates a new SparkSession. >>> from datetime import datetime >>> spark = SparkSession(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() >>> df.createOrReplaceTempView("allTypes") >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ from pyspark.sql.context import SQLContext self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm if jsparkSession is None: jsparkSession = self._jvm.SparkSession(self._jsc.sc()) self._jsparkSession = jsparkSession self._jwrapped = self._jsparkSession.sqlContext() self._wrapped = SQLContext(self._sc, self, self._jwrapped) _monkey_patch_RDD(self) install_exception_handler() # If we had an instantiated SparkSession attached with a SparkContext # which is stopped now, we need to renew the instantiated SparkSession. # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate. if SparkSession._instantiatedSession is None \ or SparkSession._instantiatedSession._sc._jsc is None: SparkSession._instantiatedSession = self
def __init__(self, sparkContext, sqlContext=None): """Creates a new SQLContext. >>> from datetime import datetime >>> sqlContext = SQLContext(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() >>> df.registerTempTable("allTypes") >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row(_c0=2, _c1=2.0, _c2=False, _c3=2, _c4=0, \ time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] >>> df.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm self._scala_SQLContext = sqlContext _monkey_patch_RDD(self) install_exception_handler() if SQLContext._instantiatedContext is None: SQLContext._instantiatedContext = self
def __init__( self, sparkContext: SparkContext, sparkSession: Optional[SparkSession] = None, jsqlContext: Optional[JavaObject] = None, ): if sparkSession is None: warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", FutureWarning, ) self._sc = sparkContext self._jsc = self._sc._jsc # type: ignore[attr-defined] self._jvm = self._sc._jvm # type: ignore[attr-defined] if sparkSession is None: sparkSession = SparkSession.builder.getOrCreate() if jsqlContext is None: jsqlContext = sparkSession._jwrapped self.sparkSession = sparkSession self._jsqlContext = jsqlContext _monkey_patch_RDD(self.sparkSession) install_exception_handler() if (SQLContext._instantiatedContext is None or SQLContext._instantiatedContext._sc._jsc is None # type: ignore[attr-defined] ): SQLContext._instantiatedContext = self
def __init__(self, sparkContext, jsparkSession=None): from pyspark.sql.context import SQLContext self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm if jsparkSession is None: if self._jvm.SparkSession.getDefaultSession().isDefined() \ and not self._jvm.SparkSession.getDefaultSession().get() \ .sparkContext().isStopped(): jsparkSession = self._jvm.SparkSession.getDefaultSession().get( ) else: jsparkSession = self._jvm.SparkSession(self._jsc.sc()) self._jsparkSession = jsparkSession self._jwrapped = self._jsparkSession.sqlContext() self._wrapped = SQLContext(self._sc, self, self._jwrapped) _monkey_patch_RDD(self) install_exception_handler() # If we had an instantiated SparkSession attached with a SparkContext # which is stopped now, we need to renew the instantiated SparkSession. # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate. if SparkSession._instantiatedSession is None \ or SparkSession._instantiatedSession._sc._jsc is None: SparkSession._instantiatedSession = self SparkSession._activeSession = self self._jvm.SparkSession.setDefaultSession(self._jsparkSession) self._jvm.SparkSession.setActiveSession(self._jsparkSession)
def __init__( self, sparkContext: SparkContext, jsparkSession: Optional[JavaObject] = None, options: Optional[Dict[str, Any]] = {}, ): from pyspark.sql.context import SQLContext self._sc = sparkContext self._jsc = self._sc._jsc # type: ignore[attr-defined] self._jvm = self._sc._jvm # type: ignore[attr-defined] if jsparkSession is None: if (self._jvm.SparkSession.getDefaultSession().isDefined() and not self._jvm.SparkSession.getDefaultSession().get( ).sparkContext().isStopped()): jsparkSession = self._jvm.SparkSession.getDefaultSession().get( ) else: jsparkSession = self._jvm.SparkSession(self._jsc.sc(), options) self._jsparkSession = jsparkSession self._jwrapped = self._jsparkSession.sqlContext() self._wrapped = SQLContext(self._sc, self, self._jwrapped) _monkey_patch_RDD(self) install_exception_handler() # If we had an instantiated SparkSession attached with a SparkContext # which is stopped now, we need to renew the instantiated SparkSession. # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate. if (SparkSession._instantiatedSession is None or SparkSession._instantiatedSession._sc._jsc is None # type: ignore[attr-defined] ): SparkSession._instantiatedSession = self SparkSession._activeSession = self self._jvm.SparkSession.setDefaultSession(self._jsparkSession) self._jvm.SparkSession.setActiveSession(self._jsparkSession)
def __init__( self, sparkContext: SparkContext, jsparkSession: Optional[JavaObject] = None, options: Dict[str, Any] = {}, ): self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm assert self._jvm is not None if jsparkSession is None: if ( self._jvm.SparkSession.getDefaultSession().isDefined() and not self._jvm.SparkSession.getDefaultSession().get().sparkContext().isStopped() ): jsparkSession = self._jvm.SparkSession.getDefaultSession().get() getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings( jsparkSession, options ) else: jsparkSession = self._jvm.SparkSession(self._jsc.sc(), options) else: getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings( jsparkSession, options ) self._jsparkSession = jsparkSession _monkey_patch_RDD(self) install_exception_handler() # If we had an instantiated SparkSession attached with a SparkContext # which is stopped now, we need to renew the instantiated SparkSession. # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate. if ( SparkSession._instantiatedSession is None or SparkSession._instantiatedSession._sc._jsc is None ): SparkSession._instantiatedSession = self SparkSession._activeSession = self assert self._jvm is not None self._jvm.SparkSession.setDefaultSession(self._jsparkSession) self._jvm.SparkSession.setActiveSession(self._jsparkSession)
def __init__(self, sparkContext, sparkSession=None, jsqlContext=None): if sparkSession is None: warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", FutureWarning) self._sc = sparkContext self._jsc = self._sc._jsc self._jvm = self._sc._jvm if sparkSession is None: sparkSession = SparkSession.builder.getOrCreate() if jsqlContext is None: jsqlContext = sparkSession._jwrapped self.sparkSession = sparkSession self._jsqlContext = jsqlContext _monkey_patch_RDD(self.sparkSession) install_exception_handler() if (SQLContext._instantiatedContext is None or SQLContext._instantiatedContext._sc._jsc is None): SQLContext._instantiatedContext = self