Python SparkSession._getActiveSessionOrCreate示例

编程语言: Python

命名空间/包名称: pyspark.sql.session

类/类型: SparkSession

方法/功能: _getActiveSessionOrCreate

hotexamples.com的示例: 6

Python SparkSession._getActiveSessionOrCreate - 已找到6个示例。这些是从开源项目中提取的最受好评的pyspark.sql.session.SparkSession._getActiveSessionOrCreate现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SparkSession(30)

createDataFrame(30)

sql(30)

_getActiveSessionOrCreate(6)

__init__(2)

stop(2)

range(1)

table(1)

withHiveSupport(1)

示例#1

显示文件

文件： context.py 项目： zhengruifeng/spark

    def __init__(
        self,
        sparkContext: SparkContext,
        sparkSession: Optional[SparkSession] = None,
        jsqlContext: Optional[JavaObject] = None,
    ):
        if sparkSession is None:
            warnings.warn(
                "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
                FutureWarning,
            )

        self._sc = sparkContext
        self._jsc = self._sc._jsc
        self._jvm = self._sc._jvm
        if sparkSession is None:
            sparkSession = SparkSession._getActiveSessionOrCreate()
        if jsqlContext is None:
            jsqlContext = sparkSession._jsparkSession.sqlContext()
        self.sparkSession = sparkSession
        self._jsqlContext = jsqlContext
        _monkey_patch_RDD(self.sparkSession)
        install_exception_handler()
        if (SQLContext._instantiatedContext is None
                or SQLContext._instantiatedContext._sc._jsc is None):
            SQLContext._instantiatedContext = self

示例#2

显示文件

    def schema(self, schema: Union[StructType, str]) -> "DataStreamReader":
        """Specifies the input schema.

        Some data sources (e.g. JSON) can infer the input schema automatically from data.
        By specifying the schema here, the underlying data source can skip the schema
        inference step, and thus speed up data loading.

        .. versionadded:: 2.0.0

        Parameters
        ----------
        schema : :class:`pyspark.sql.types.StructType` or str
            a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
            (For example ``col0 INT, col1 DOUBLE``).

        Notes
        -----
        This API is evolving.

        Examples
        --------
        >>> s = spark.readStream.schema(sdf_schema)
        >>> s = spark.readStream.schema("col0 INT, col1 DOUBLE")
        """
        from pyspark.sql import SparkSession

        spark = SparkSession._getActiveSessionOrCreate()
        if isinstance(schema, StructType):
            jschema = spark._jsparkSession.parseDataType(schema.json())
            self._jreader = self._jreader.schema(jschema)
        elif isinstance(schema, str):
            self._jreader = self._jreader.schema(schema)
        else:
            raise TypeError("schema should be StructType or string")
        return self

示例#3

显示文件

def _test() -> None:
    import doctest
    import os
    import tempfile
    from pyspark.sql import SparkSession
    import pyspark.sql.streaming.readwriter
    from py4j.protocol import Py4JError

    os.chdir(os.environ["SPARK_HOME"])

    globs = pyspark.sql.streaming.readwriter.__dict__.copy()
    try:
        spark = SparkSession._getActiveSessionOrCreate()
    except Py4JError:  # noqa: F821
        spark = SparkSession(sc)  # type: ignore[name-defined] # noqa: F821

    globs["tempfile"] = tempfile
    globs["spark"] = spark
    globs["sdf"] = spark.readStream.format("text").load(
        "python/test_support/sql/streaming")
    globs["sdf_schema"] = StructType([StructField("data", StringType(), True)])

    (failure_count, test_count) = doctest.testmod(
        pyspark.sql.streaming.readwriter,
        globs=globs,
        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
        | doctest.REPORT_NDIFF,
    )
    globs["spark"].stop()

    if failure_count:
        sys.exit(-1)

示例#4

显示文件

    def _create_judf(self, func: Callable[..., Any]) -> JavaObject:
        from pyspark.sql import SparkSession

        spark = SparkSession._getActiveSessionOrCreate()
        sc = spark.sparkContext

        wrapped_func = _wrap_function(sc, func, self.returnType)
        jdt = spark._jsparkSession.parseDataType(self.returnType.json())
        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(  # type: ignore[attr-defined]
            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
        return judf

示例#5

显示文件

文件： context.py 项目： zhengruifeng/spark

    def _get_or_create(cls: Type["SQLContext"], sc: SparkContext,
                       **static_conf: Any) -> "SQLContext":

        if (cls._instantiatedContext is None
                or SQLContext._instantiatedContext._sc._jsc is
                None  # type: ignore[union-attr]
            ):
            assert sc._jvm is not None
            # There can be only one running Spark context. That will automatically
            # be used in the Spark session internally.
            session = SparkSession._getActiveSessionOrCreate(**static_conf)
            cls(sc, session, session._jsparkSession.sqlContext())
        return cast(SQLContext, cls._instantiatedContext)

示例#6

显示文件

 def __init__(self,
              sparkContext: SparkContext,
              jhiveContext: Optional[JavaObject] = None):
     warnings.warn(
         "HiveContext is deprecated in Spark 2.0.0. Please use " +
         "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
         FutureWarning,
     )
     static_conf = {}
     if jhiveContext is None:
         static_conf = {"spark.sql.catalogImplementation": "in-memory"}
     # There can be only one running Spark context. That will automatically
     # be used in the Spark session internally.
     session = SparkSession._getActiveSessionOrCreate(**static_conf)
     SQLContext.__init__(self, sparkContext, session, jhiveContext)