Exemplo n.º 1
0
    def create(self) -> SparkSessionLazy:
        def createLazy():
            return SparkSession \
                .builder \
                .getOrCreate()

        return SparkSessionLazy(createLazy)
    def create(self) -> SparkSessionLazy:
        spark = IPython.get_ipython().user_ns['spark']  # type: SparkSession

        for configurator in self.__configurators:
            configurator.configure(spark)

        IPython.get_ipython().user_ns['spark'] = spark

        return SparkSessionLazy(lambda: spark)
    def create(self) -> SparkSessionLazy:
        spark = IPython.get_ipython().user_ns['spark']  # type: SparkSession

        for k, v in self.__extraConfig.items():
            spark.conf.set(k, v)

        IPython.get_ipython().user_ns['spark'] = spark

        return SparkSessionLazy(lambda: spark)
    def create(self) -> SparkSessionLazy:
        def create_lazy():
            spark = SparkSession.builder.getOrCreate()

            for configurator in self.__configurators:
                configurator.configure(spark)

            return spark

        return SparkSessionLazy(create_lazy)
Exemplo n.º 5
0
    def create(self) -> SparkSessionLazy:
        import IPython  # pylint: disable = import-error, import-outside-toplevel
        spark = IPython.get_ipython().user_ns['spark']  # type: SparkSession

        for configurator in self.__configurators:
            configurator.configure(spark)

        IPython.get_ipython().user_ns['spark'] = spark

        return SparkSessionLazy(lambda: spark)
Exemplo n.º 6
0
    def create(self) -> SparkSessionLazy:
        # pylint: disable=import-outside-toplevel
        import IPython

        spark: SparkSession = IPython.get_ipython().user_ns["spark"]

        for configurator in self.__configurators:
            configurator.configure(spark)

        IPython.get_ipython().user_ns["spark"] = spark

        return SparkSessionLazy(lambda: spark)
    def create(self) -> SparkSessionLazy:
        def createLazy():
            conf = SparkConf()
            conf.set('spark.databricks.service.address', self.__address)
            conf.set('spark.databricks.service.token', self.__token)
            conf.set('spark.databricks.service.clusterId', self.__clusterId)

            if self.__orgId is not None:
                conf.set('spark.databricks.service.orgId', self.__orgId)

            conf.set('spark.databricks.service.port', self.__port)

            if self.__bindAddress is not None:
                conf.set('spark.driver.bindAddress', self.__bindAddress)

            for k, v in self.__extraConfig.items():
                conf.set(k, v)

            return SparkSession.builder.config(conf=conf).getOrCreate()

        return SparkSessionLazy(createLazy)
    def create(self) -> SparkSessionLazy:
        def create_lazy():
            if not self.__config.address:
                raise Exception("Databricks workspace address not set")

            if not self.__config.token:
                raise Exception("Databricks workspace token not set")

            if not self.__config.cluster_id:
                raise Exception("Databricks cluster not set")

            if not self.__config.port:
                raise Exception("Databricks connect port not set")

            # Databricks Connect configuration must be set before calling getOrCreate()
            conf = SparkConf()
            conf.set("spark.databricks.service.address", self.__config.address)
            conf.set("spark.databricks.service.token", self.__config.token)
            conf.set("spark.databricks.service.clusterId",
                     self.__config.cluster_id)

            if self.__config.org_id is not None:
                conf.set("spark.databricks.service.orgId",
                         self.__config.org_id)

            conf.set("spark.databricks.service.port", self.__config.port)

            if self.__config.driver_bind_address is not None:
                conf.set("spark.driver.bindAddress",
                         self.__config.driver_bind_address)

            spark = SparkSession.builder.config(conf=conf).getOrCreate()

            for configurator in self.__configurators:
                configurator.configure(spark)

            return spark

        return SparkSessionLazy(create_lazy)
    def create(self) -> SparkSessionLazy:
        if not self.__address:
            raise Exception('Databricks workspace address not set')

        if not self.__token:
            raise Exception('Databricks workspace token not set')

        if not self.__clusterId:
            raise Exception('Databricks cluster not set')

        if not self.__port:
            raise Exception('Databricks connect port not set')

        def createLazy():
            # Databricks Connect configuration must be set before calling getOrCreate()
            conf = SparkConf()
            conf.set('spark.databricks.service.address', self.__address)
            conf.set('spark.databricks.service.token', self.__token)
            conf.set('spark.databricks.service.clusterId', self.__clusterId)

            if self.__orgId is not None:
                conf.set('spark.databricks.service.orgId', self.__orgId)

            conf.set('spark.databricks.service.port', self.__port)

            if self.__bindAddress is not None:
                conf.set('spark.driver.bindAddress', self.__bindAddress)

            spark = SparkSession.builder.config(conf=conf).getOrCreate()

            for configurator in self.__configurators:
                configurator.configure(spark)

            return spark

        return SparkSessionLazy(createLazy)