示例#1
0
def init_spark_on_k8s(master,
                      container_image,
                      num_executors,
                      executor_cores,
                      executor_memory="2g",
                      driver_memory="1g",
                      driver_cores=4,
                      extra_executor_memory_for_ray=None,
                      extra_python_lib=None,
                      spark_log_level="WARN",
                      redirect_spark_log=True,
                      jars=None,
                      conf=None,
                      python_location=None):
    """
    Create a SparkContext with Analytics Zoo configurations on Kubernetes cluster for k8s client
    mode. You are recommended to use the Docker image intelanalytics/hyperzoo:latest.
    You can refer to https://github.com/intel-analytics/analytics-zoo/tree/master/docker/hyperzoo
    to build your own Docker image.

    :param master: The master address of your k8s cluster.
    :param container_image: The name of the docker container image for Spark executors.
           For example, intelanalytics/hyperzoo:latest
    :param num_executors: The number of Spark executors.
    :param executor_cores: The number of cores for each executor.
    :param executor_memory: The memory for each executor. Default to be '2g'.
    :param driver_cores: The number of cores for the Spark driver. Default to be 4.
    :param driver_memory: The memory for the Spark driver. Default to be '1g'.
    :param extra_executor_memory_for_ray: The extra memory for Ray services. Default to be None.
    :param extra_python_lib: Extra python files or packages needed for distribution.
           Default to be None.
    :param spark_log_level: The log level for Spark. Default to be 'WARN'.
    :param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
    :param jars: Comma-separated list of jars to be included on driver and executor's classpath.
           Default to be None.
    :param conf: You can append extra conf for Spark in key-value format.
           i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
           Default to be None.
    :param python_location: The path to your running Python executable. If not specified, the
           default Python interpreter in effect would be used.

    :return: An instance of SparkContext.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    sc = runner.init_spark_on_k8s(
        master=master,
        container_image=container_image,
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_memory=driver_memory,
        driver_cores=driver_cores,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        jars=jars,
        conf=conf,
        python_location=python_location)
    return sc
示例#2
0
def init_spark_on_k8s(master,
                      container_image,
                      num_executors,
                      executor_cores,
                      executor_memory="2g",
                      driver_memory="1g",
                      driver_cores=4,
                      extra_executor_memory_for_ray=None,
                      extra_python_lib=None,
                      spark_log_level="WARN",
                      redirect_spark_log=True,
                      jars=None,
                      conf=None,
                      python_location=None):

    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    sc = runner.init_spark_on_k8s(
        master=master,
        container_image=container_image,
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_memory=driver_memory,
        driver_cores=driver_cores,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        jars=jars,
        conf=conf,
        python_location=python_location)
    return sc
示例#3
0
def init_spark_on_yarn(hadoop_conf,
                       conda_name,
                       num_executor,
                       executor_cores,
                       executor_memory="2g",
                       driver_memory="1g",
                       driver_cores=4,
                       extra_executor_memory_for_ray=None,
                       extra_python_lib=None,
                       penv_archive=None,
                       hadoop_user_name="root",
                       spark_yarn_archive=None,
                       spark_log_level="WARN",
                       redirect_spark_log=True,
                       jars=None,
                       spark_conf=None):
    """
    Create a SparkContext with Zoo configuration on Yarn cluster on "Yarn-client" mode.
    You should create a conda env and install the python dependencies in that env.
    Conda env and the python dependencies only need to be installed in the driver machine.
    It's not necessary create and install those on the whole yarn cluster.

    :param hadoop_conf: path to the yarn configuration folder.
    :param conda_name: Name of the conda env.
    :param num_executor: Number of the Executors.
    :param executor_cores: Cores for each Executor.
    :param executor_memory: Memory for each Executor.
    :param driver_memory: Memory for the Driver.
    :param driver_cores: Number of cores for the Driver.
    :param extra_executor_memory_for_ray: Memory size for the Ray services.
    :param extra_python_lib:
    :param penv_archive: Ideally, program would auto-pack the conda env which is specified by
           `conda_name`, but you can also pass the path to a packed file in "tar.gz" format here.
    :param hadoop_user_name: User name for running in yarn cluster. Default value is: root
    :param spark_log_level: Log level of Spark
    :param redirect_spark_log: Direct the Spark log to local file or not.
    :param jars: Comma-separated list of jars to include on the driver and executor classpaths.
    :param spark_conf: You can append extra spark conf here in key value format.
                       i.e spark_conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}
    :return: SparkContext
    """
    from zoo.util.spark import SparkRunner
    sparkrunner = SparkRunner(spark_log_level=spark_log_level,
                              redirect_spark_log=redirect_spark_log)
    sc = sparkrunner.init_spark_on_yarn(
        hadoop_conf=hadoop_conf,
        conda_name=conda_name,
        num_executor=num_executor,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_memory=driver_memory,
        driver_cores=driver_cores,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        penv_archive=penv_archive,
        hadoop_user_name=hadoop_user_name,
        spark_yarn_archive=spark_yarn_archive,
        jars=jars,
        spark_conf=spark_conf)
    return sc
示例#4
0
def init_spark_on_local(cores=2,
                        conf=None,
                        python_location=None,
                        spark_log_level="WARN",
                        redirect_spark_log=True):
    """
    Create a SparkContext with Analytics Zoo configurations on the local machine.

    :param cores: The number of cores for Spark local. Default to be 2. You can also set it to "*"
           to use all the available cores. i.e `init_spark_on_local(cores="*")`
    :param conf: You can append extra conf for Spark in key-value format.
           i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
           Default to be None.
    :param python_location: The path to your running Python executable. If not specified, the
           default Python interpreter in effect would be used.
    :param spark_log_level: The log level for Spark. Default to be 'WARN'.
    :param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.

    :return: An instance of SparkContext.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    return runner.init_spark_on_local(cores=cores,
                                      conf=conf,
                                      python_location=python_location)
示例#5
0
def init_spark_standalone(num_executors,
                          executor_cores,
                          executor_memory="2g",
                          driver_cores=4,
                          driver_memory="1g",
                          master=None,
                          extra_executor_memory_for_ray=None,
                          extra_python_lib=None,
                          spark_log_level="WARN",
                          redirect_spark_log=True,
                          conf=None,
                          jars=None):
    """
    Create a SparkContext with Analytics Zoo configurations on Spark standalone cluster of
    a single node.
    By default, a new Spark standalone cluster would be started first and the SparkContext
    would use its master address. You need to call `stop_spark_standalone` after your program
    finishes to shutdown the cluster.
    You can also specify spark_master if you have already started a standalone cluster.

    :param num_executors: The number of Spark executors.
    :param executor_cores: The number of cores for each executor.
    :param executor_memory: The memory for each executor. Default to be '2g'.
    :param driver_cores: The number of cores for the Spark driver. Default to be 4.
    :param driver_memory: The memory for the Spark driver. Default to be '1g'.
    :param master: The master URL of an existing Spark standalone cluster: 'spark://master:port'.
    You only need to specify this if you have already started a standalone cluster.
    Default to be None and a new standalone cluster would be started in this case.
    :param extra_executor_memory_for_ray: The extra memory for Ray services. Default to be None.
    :param extra_python_lib: Extra python files or packages needed for distribution.
           Default to be None.
    :param spark_log_level: The log level for Spark. Default to be 'WARN'.
    :param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
    :param jars: Comma-separated list of jars to be included on driver and executor's classpath.
           Default to be None.
    :param conf: You can append extra conf for Spark in key-value format.
           i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
           Default to be None.

    :return: An instance of SparkContext.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    sc = runner.init_spark_standalone(
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_cores=driver_cores,
        driver_memory=driver_memory,
        master=master,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        conf=conf,
        jars=jars)
    return sc
示例#6
0
def init_spark_on_k8s(master,
                      container_image,
                      num_executors,
                      executor_cores,
                      executor_memory="2g",
                      driver_memory="1g",
                      driver_cores=4,
                      extra_executor_memory_for_ray=None,
                      extra_python_lib=None,
                      spark_log_level="WARN",
                      redirect_spark_log=True,
                      jars=None,
                      conf=None,
                      python_location=None):
    """Returns the local TrainingOperator object.

       Be careful not to perturb its state, or else you can cause the system
       to enter an inconsistent state.
       
       Args:
            num_steps (int): Number of batches to compute update steps on
                per worker. This corresponds also to the number of times
                ``TrainingOperator.validate_batch`` is called per worker.
            profile (bool): Returns time stats for the evaluation procedure.
            reduce_results (bool): Whether to average all metrics across
                all workers into one dict. If a metric is a non-numerical
                value (or nested dictionaries), one value will be randomly
                selected among the workers. If False, returns a list of dicts.
            info (dict): Optional dictionary passed to the training
                operator for `validate` and `validate_batch`.
       
        Returns:
            TrainingOperator: The local TrainingOperator object.
    """

    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    sc = runner.init_spark_on_k8s(
        master=master,
        container_image=container_image,
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_memory=driver_memory,
        driver_cores=driver_cores,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        jars=jars,
        conf=conf,
        python_location=python_location)
    return sc
示例#7
0
def init_spark_on_local(cores=2, conf=None, python_location=None, spark_log_level="WARN",
                        redirect_spark_log=True):
    """Saves the Trainer state to the provided checkpoint path.

    Args:

        checkpoint (str): Path to target checkpoint file.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    return runner.init_spark_on_local(cores=cores, conf=conf,
                                      python_location=python_location)
示例#8
0
def init_spark_on_yarn(hadoop_conf,
                       conda_name,
                       num_executors,
                       executor_cores,
                       executor_memory="2g",
                       driver_cores=4,
                       driver_memory="1g",
                       extra_executor_memory_for_ray=None,
                       extra_python_lib=None,
                       penv_archive=None,
                       additional_archive=None,
                       hadoop_user_name="root",
                       spark_yarn_archive=None,
                       spark_log_level="WARN",
                       redirect_spark_log=True,
                       jars=None,
                       conf=None):
    """Returns the local TrainingOperator object.
    
       Be careful not to perturb its state, or else you can cause the system
       to enter an inconsistent state.
       
       Returns:
            TrainingOperator: The local TrainingOperator object.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    sc = runner.init_spark_on_yarn(
        hadoop_conf=hadoop_conf,
        conda_name=conda_name,
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_cores=driver_cores,
        driver_memory=driver_memory,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        penv_archive=penv_archive,
        additional_archive=additional_archive,
        hadoop_user_name=hadoop_user_name,
        spark_yarn_archive=spark_yarn_archive,
        jars=jars,
        conf=conf)
    return sc
示例#9
0
def init_spark_on_local(cores=2, conf=None, python_location=None, spark_log_level="WARN",
                        redirect_spark_log=True):
    """
    Create a SparkContext with Zoo configuration in local machine.
    :param cores: The default value is 2 and you can also set it to *
     meaning all of the available cores. i.e `init_on_local(cores="*")`
    :param conf: A key value dictionary appended to SparkConf.
    :param python_location: The path to your running python executable.
    :param spark_log_level: Log level of Spark
    :param redirect_spark_log: Redirect the Spark log to local file or not.
    :return:
    """
    from zoo.util.spark import SparkRunner
    sparkrunner = SparkRunner(spark_log_level=spark_log_level,
                              redirect_spark_log=redirect_spark_log)
    return sparkrunner.init_spark_on_local(cores=cores, conf=conf,
                                           python_location=python_location)
示例#10
0
def init_spark_standalone(num_executors,
                          executor_cores,
                          executor_memory="2g",
                          driver_cores=4,
                          driver_memory="1g",
                          master=None,
                          extra_executor_memory_for_ray=None,
                          extra_python_lib=None,
                          spark_log_level="WARN",
                          redirect_spark_log=True,
                          conf=None,
                          jars=None,
                          python_location=None,
                          enable_numa_binding=False):
    """Returns the local TrainingOperator object.

       Be careful not to perturb its state, or else you can cause the system
       to enter an inconsistent state.

       Returns:
            TrainingOperator: The local TrainingOperator object.
    """

    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    sc = runner.init_spark_standalone(
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_cores=driver_cores,
        driver_memory=driver_memory,
        master=master,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        conf=conf,
        jars=jars,
        python_location=python_location,
        enable_numa_binding=enable_numa_binding)
    return sc
示例#11
0
def init_spark_on_yarn(hadoop_conf,
                       conda_name,
                       num_executors,
                       executor_cores,
                       executor_memory="2g",
                       driver_cores=4,
                       driver_memory="2g",
                       extra_executor_memory_for_ray=None,
                       extra_python_lib=None,
                       penv_archive=None,
                       additional_archive=None,
                       hadoop_user_name="root",
                       spark_yarn_archive=None,
                       spark_log_level="WARN",
                       redirect_spark_log=True,
                       jars=None,
                       conf=None):
    """
    Create a SparkContext with Analytics Zoo configurations on Yarn cluster for yarn-client mode.
    You only need to create a conda environment and install the python dependencies in that
    environment beforehand on the driver machine. These dependencies would be automatically
    packaged and distributed to the whole Yarn cluster.

    :param hadoop_conf: The path to the yarn configuration folder.
    :param conda_name: The name of the conda environment.
    :param num_executors: The number of Spark executors.
    :param executor_cores: The number of cores for each executor.
    :param executor_memory: The memory for each executor. Default to be '2g'.
    :param driver_cores: The number of cores for the Spark driver. Default to be 4.
    :param driver_memory: The memory for the Spark driver. Default to be '1g'.
    :param extra_executor_memory_for_ray: The extra memory for Ray services. Default to be None.
    :param extra_python_lib: Extra python files or packages needed for distribution.
           Default to be None.
    :param penv_archive: Ideally, the program would auto-pack the conda environment specified by
           'conda_name', but you can also pass the path to a packed file in "tar.gz" format here.
           Default to be None.
    :param additional_archive: Comma-separated list of additional archives to be uploaded and
           unpacked on executors. Default to be None.
    :param hadoop_user_name: The user name for running the yarn cluster. Default to be 'root'.
    :param spark_yarn_archive: Conf value for setting spark.yarn.archive. Default to be None.
    :param spark_log_level: The log level for Spark. Default to be 'WARN'.
    :param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
    :param jars: Comma-separated list of jars to be included on driver and executor's classpath.
           Default to be None.
    :param conf: You can append extra conf for Spark in key-value format.
           i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
           Default to be None.

    :return: An instance of SparkContext.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    sc = runner.init_spark_on_yarn(
        hadoop_conf=hadoop_conf,
        conda_name=conda_name,
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_cores=driver_cores,
        driver_memory=driver_memory,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        penv_archive=penv_archive,
        additional_archive=additional_archive,
        hadoop_user_name=hadoop_user_name,
        spark_yarn_archive=spark_yarn_archive,
        jars=jars,
        conf=conf)
    return sc
示例#12
0
def stop_spark_standalone():
    """
    Stop the Spark standalone cluster created from init_spark_standalone (master not specified).
    """
    from zoo.util.spark import SparkRunner
    SparkRunner.stop_spark_standalone()
示例#13
0
def init_spark_standalone(num_executors,
                          executor_cores,
                          executor_memory="2g",
                          driver_cores=4,
                          driver_memory="2g",
                          master=None,
                          extra_executor_memory_for_ray=None,
                          extra_python_lib=None,
                          spark_log_level="WARN",
                          redirect_spark_log=True,
                          conf=None,
                          jars=None,
                          python_location=None,
                          enable_numa_binding=False):
    """
    Create a SparkContext with Analytics Zoo configurations on Spark standalone cluster.

    You need to specify master if you already have a Spark standalone cluster. For a
    standalone cluster with multiple nodes, make sure that analytics-zoo is installed via
    pip in the Python environment on every node.
    If master is not specified, a new Spark standalone cluster on the current single node
    would be started first and the SparkContext would use its master address. You need to
    call `stop_spark_standalone` after your program finishes to shutdown the cluster.

    :param num_executors: The number of Spark executors.
    :param executor_cores: The number of cores for each executor.
    :param executor_memory: The memory for each executor. Default to be '2g'.
    :param driver_cores: The number of cores for the Spark driver. Default to be 4.
    :param driver_memory: The memory for the Spark driver. Default to be '1g'.
    :param master: The master URL of an existing Spark standalone cluster: 'spark://master:port'.
    You only need to specify this if you have already started a standalone cluster.
    Default to be None and a new standalone cluster would be started in this case.
    :param extra_executor_memory_for_ray: The extra memory for Ray services. Default to be None.
    :param extra_python_lib: Extra python files or packages needed for distribution.
           Default to be None.
    :param spark_log_level: The log level for Spark. Default to be 'WARN'.
    :param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
    :param jars: Comma-separated list of jars to be included on driver and executor's classpath.
           Default to be None.
    :param conf: You can append extra conf for Spark in key-value format.
           i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
           Default to be None.
    :param python_location: The path to your running Python executable. If not specified, the
           default Python interpreter in effect would be used.
    :param enable_numa_binding: Whether to use numactl to start spark worker in order to bind
           different worker processes to different cpus and memory areas. This is may lead to
           better performance on a multi-sockets machine. Defaults to False.

    :return: An instance of SparkContext.
    """
    from zoo.util.spark import SparkRunner
    runner = SparkRunner(spark_log_level=spark_log_level,
                         redirect_spark_log=redirect_spark_log)
    set_python_home()
    sc = runner.init_spark_standalone(
        num_executors=num_executors,
        executor_cores=executor_cores,
        executor_memory=executor_memory,
        driver_cores=driver_cores,
        driver_memory=driver_memory,
        master=master,
        extra_executor_memory_for_ray=extra_executor_memory_for_ray,
        extra_python_lib=extra_python_lib,
        conf=conf,
        jars=jars,
        python_location=python_location,
        enable_numa_binding=enable_numa_binding)
    return sc
示例#14
0
def stop_spark_standalone():
    from zoo.util.spark import SparkRunner
    SparkRunner.stop_spark_standalone()