Exemplo n.º 1
0
def make_spark_distribution(
        commit_id, target_dir, spark_git_repo,
        merge_commit_into_master=False, is_yarn_mode=False,
        additional_make_distribution_args=""):
    """
    Download Spark, check out a specific version, and create a binary distribution.

    :param commit_id: the version to build.  Can specify any of the following:
        1. A git commit hash         e.g. "4af93ff3"
        2. A branch name             e.g. "origin/branch-0.7"
        3. A tag name                e.g. "origin/tag/v0.8.0-incubating"
        4. A pull request            e.g. "origin/pr/675"
    :param target_dir: the directory to clone Spark into.
    :param merge_commit_into_master: if True, this commit_id will be merged into `master`; this can
                                     be useful for testing un-merged pull requests.
    :param spark_git_repo: the repo to clone from.  By default, this is the Spark GitHub mirror.
    """
    clone_spark(target_dir, spark_git_repo)
    checkout_version(target_dir, commit_id, merge_commit_into_master)
    with cd(target_dir):
        logger.info("Building spark at version %s; This may take a while...\n" % commit_id)
        # According to the SPARK-1520 JIRA, building with Java 7+ will only cause problems when
        # running PySpark on YARN or when running on Java 6.  Since we'll be building and running
        # Spark on the same machines and using standalone mode, it should be safe to
        # disable this warning:
        if is_yarn_mode:
            run_cmd("./make-distribution.sh -Pyarn " + additional_make_distribution_args)
        else:
            run_cmd("./make-distribution.sh " + additional_make_distribution_args)
Exemplo n.º 2
0
def make_spark_distribution(commit_id,
                            target_dir,
                            spark_git_repo,
                            merge_commit_into_master=False,
                            is_yarn_mode=False,
                            additional_make_distribution_args=""):
    """
    Download Spark, check out a specific version, and create a binary distribution.

    :param commit_id: the version to build.  Can specify any of the following:
        1. A git commit hash         e.g. "4af93ff3"
        2. A branch name             e.g. "origin/branch-0.7"
        3. A tag name                e.g. "origin/tag/v0.8.0-incubating"
        4. A pull request            e.g. "origin/pr/675"
    :param target_dir: the directory to clone Spark into.
    :param merge_commit_into_master: if True, this commit_id will be merged into `master`; this can
                                     be useful for testing un-merged pull requests.
    :param spark_git_repo: the repo to clone from.  By default, this is the Spark GitHub mirror.
    """
    clone_spark(target_dir, spark_git_repo)
    checkout_version(target_dir, commit_id, merge_commit_into_master)
    with cd(target_dir):
        logger.info(
            "Building spark at version %s; This may take a while...\n" %
            commit_id)
        # According to the SPARK-1520 JIRA, building with Java 7+ will only cause problems when
        # running PySpark on YARN or when running on Java 6.  Since we'll be building and running
        # Spark on the same machines and using standalone mode, it should be safe to
        # disable this warning:
        if is_yarn_mode:
            run_cmd("./make-distribution.sh --skip-java-test -Pyarn " +
                    additional_make_distribution_args)
        else:
            run_cmd("./make-distribution.sh --skip-java-test " +
                    additional_make_distribution_args)
Exemplo n.º 3
0
def checkout_version(repo_dir, commit_id, merge_commit_into_master=False):
    with cd(repo_dir):
        # Fetch updates
        logger.info("Updating Spark repo...")
        run_cmd("git fetch")

        # Check out the requested commit / branch / PR
        logger.info("Cleaning Spark and checking out commit_id %s." % commit_id)
        run_cmd("git clean -f -d -x")

        if merge_commit_into_master:
            run_cmd("git reset --hard master")
            run_cmd("git merge %s -m ='Merging %s into master.'" %
                    (commit_id, commit_id))
        else:
            run_cmd("git reset --hard %s" % commit_id)
Exemplo n.º 4
0
def checkout_version(repo_dir, commit_id, merge_commit_into_master=False):
    with cd(repo_dir):
        # Fetch updates
        logger.info("Updating Spark repo...")
        run_cmd("git fetch")

        # Check out the requested commit / branch / PR
        logger.info("Cleaning Spark and checking out commit_id %s." % commit_id)
        run_cmd("git clean -f -d -x")

        if merge_commit_into_master:
            run_cmd("git reset --hard master")
            run_cmd("git merge %s -m ='Merging %s into master.'" %
                    (commit_id, commit_id))
        else:
            run_cmd("git reset --hard %s" % commit_id)