def spark_context(): conf = pyspark.SparkConf() conf.set(key="spark.jars.packages", value=get_mleap_jars()) # Exclude `net.sourceforge.f2j` to avoid `java.io.FileNotFoundException` conf.set(key="spark.jars.excludes", value="net.sourceforge.f2j:arpack_combined_all") spark_session = get_spark_session(conf) return spark_session.sparkContext
def spark_context(): if Version(pyspark.__version__) < Version("3.1"): # A workaround for this issue: # https://stackoverflow.com/questions/62109276/errorjava-lang-unsupportedoperationexception-for-pyspark-pandas-udf-documenta spark_home = (os.environ.get("SPARK_HOME") if "SPARK_HOME" in os.environ else os.path.dirname(pyspark.__file__)) conf_dir = os.path.join(spark_home, "conf") os.makedirs(conf_dir, exist_ok=True) with open(os.path.join(conf_dir, "spark-defaults.conf"), "w") as f: conf = """ spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" """ f.write(conf) conf = pyspark.SparkConf() max_tries = 3 for num_tries in range(max_tries): try: spark = get_spark_session(conf) return spark.sparkContext except Exception as e: if num_tries >= max_tries - 1: raise _logger.exception( e, "Attempt %s to create a SparkSession failed, retrying..." % num_tries)
def spark_context(): conf = pyspark.SparkConf() conf.set( key="spark.jars.packages", value=("ml.combust.mleap:mleap-spark-base_2.11:0.12.0," "ml.combust.mleap:mleap-spark_2.11:0.12.0"), ) spark_session = get_spark_session(conf) return spark_session.sparkContext
def _get_spark_session_with_retry(max_tries=3): conf = pyspark.SparkConf() for num_tries in range(max_tries): try: return get_spark_session(conf) except Exception as e: if num_tries >= max_tries - 1: raise _logger.exception( e, "Attempt %s to create a SparkSession failed, retrying..." % num_tries )
def spark_context(): conf = pyspark.SparkConf() max_tries = 3 for num_tries in range(max_tries): try: spark = get_spark_session(conf) return spark.sparkContext except Exception as e: if num_tries >= max_tries - 1: raise _logger.exception( e, "Attempt %s to create a SparkSession failed, retrying..." % num_tries)
def spark_context(): conf = pyspark.SparkConf() conf.set(key="spark.jars.packages", value='ml.combust.mleap:mleap-spark-base_2.11:0.12.0,' 'ml.combust.mleap:mleap-spark_2.11:0.12.0') max_tries = 3 for num_tries in range(max_tries): try: spark = get_spark_session(conf) return spark.sparkContext except Exception as e: if num_tries >= max_tries - 1: raise _logger.exception(e, "Attempt %s to create a SparkSession failed, retrying..." % num_tries)