def download_and_build_spark_cassandra_stress(node): dse_home = "DSE_HOME={dse_path}".format(dse_path=dse.get_dse_path()) dse_resources = "DSE_RESOURCES={dse_resources_path}".format( dse_resources_path=os.path.join(dse.get_dse_path(), "resources") ) build_command = "./gradlew jar -Pagainst=dse;" with common.fab.settings(hosts=node): execute( fab.run, "rm -rf {spark_cass_stress_path}".format(spark_cass_stress_path=get_spark_cassandra_stress_path()) ) execute( fab.run, "git clone -b master --single-branch https://github.com/datastax/spark-cassandra-stress.git {spark_cass_stress_path}".format( spark_cass_stress_path=get_spark_cassandra_stress_path() ), ) return execute( fab.run, "cd {spark_cass_stress_path}; TERM=dumb {dse_home} {dse_resources} {build_cmd}".format( spark_cass_stress_path=get_spark_cassandra_stress_path(), dse_home=dse_home, dse_resources=dse_resources, build_cmd=build_command, ), )
def spark_cassandra_stress(script, node): download_and_build_spark_cassandra_stress(node) dse_bin = os.path.join(dse.get_dse_path(), "bin") cmd = "cd {spark_cass_stress_path}; PATH=$PATH:{dse_bin} JAVA_HOME={JAVA_HOME} DSE_HOME={dse_home} ./run.sh dse {script}".format( JAVA_HOME=JAVA_HOME, spark_cass_stress_path=get_spark_cassandra_stress_path(), script=script, dse_bin=dse_bin, dse_home=dse.get_dse_path(), ) with common.fab.settings(fab.show("warnings", "running", "stdout", "stderr"), hosts=node): execute(fab.sudo, "rm -rf /var/lib/spark") execute(fab.sudo, "mkdir -p /var/lib/spark") execute(fab.sudo, "chmod -R 777 /var/lib/spark") return execute(fab.run, cmd)
def download_and_build_spark_cassandra_stress(stress_node=None): dse_home = 'DSE_HOME={dse_path}'.format(dse_path=dse.get_dse_path()) dse_resources = 'DSE_RESOURCES={dse_resources_path}'.format(dse_resources_path=os.path.join(dse.get_dse_path(), 'resources')) spark_cassandra_stress_git = 'https://github.com/datastax/spark-cassandra-stress.git' git_clone_spark_cass_stress_command = 'git clone -b master --single-branch ' \ '{spark_cass_stress_git} ' \ '{spark_cass_stress_path}'.format(spark_cass_stress_git=spark_cassandra_stress_git, spark_cass_stress_path=get_spark_cassandra_stress_path(stress_node=stress_node)) build_command = './gradlew jar -Pagainst=dse;' full_build_command = 'cd {spark_cass_stress_path}; TERM=dumb {dse_home} {dse_resources} {build_cmd}'.format( spark_cass_stress_path=get_spark_cassandra_stress_path(), dse_home=dse_home, dse_resources=dse_resources, build_cmd=build_command ) if stress_node: with common.fab.settings(hosts=stress_node): execute(fab.run, 'rm -rf {spark_cass_stress_path}'.format(spark_cass_stress_path=get_spark_cassandra_stress_path(stress_node=stress_node))) execute(fab.run, git_clone_spark_cass_stress_command) execute(fab.run, full_build_command) else: shutil.rmtree(get_spark_cassandra_stress_path(), ignore_errors=True) logger.info('Installing Spark-Cassandra-Stress from {spark_cass_stress_git}'.format(spark_cass_stress_git=spark_cassandra_stress_git)) proc = subprocess.Popen(git_clone_spark_cass_stress_command, shell=True) proc.wait() assert proc.returncode == 0, 'Installing Spark-Cassandra-Stress from {spark_cass_stress_git} ' \ 'did not complete successfully'.format(spark_cass_stress_git=spark_cassandra_stress_git) logger.info('Building Spark-Cassandra-Stress using {full_build_command}'.format(full_build_command=full_build_command)) proc = subprocess.Popen(full_build_command, shell=True) proc.wait() assert proc.returncode == 0, 'Building Spark-Cassandra-Stress using {full_build_command} ' \ 'did not complete successfully'.format(full_build_command=full_build_command)
def get_spark_cassandra_stress_command(script, node, master, stress_node=None): dse_bin = os.path.join(dse.get_dse_path(), 'bin') # see conversation on https://github.com/datastax/cstar_perf/pull/226 for why we pass SPARK_MASTER # tl;dr on DSE 4.7.x the dse script tries to call dsetool on the spark-cassandra-stress node # if SPARK_MASTER env var is not set and this results in a connection error trace as # we do not start DSE on the spark-cassandra-stress node spark_cassandra_stress_cmd_prefix = 'cd {spark_cass_stress_path}; ' \ 'PATH=$PATH:{dse_bin} ' \ 'JAVA_HOME={JAVA_HOME} ' \ 'DSE_HOME={dse_home} ' \ 'SPARK_MASTER={master} '.format(spark_cass_stress_path=get_spark_cassandra_stress_path(stress_node=stress_node), dse_bin=dse_bin, JAVA_HOME=JAVA_HOME, dse_home=dse.get_dse_path(), master=master) spark_cass_connection_host_arg = ' --conf spark.cassandra.connection.host={node}'.format(node=node) spark_cassandra_run_cmd = './run.sh dse {script} {master} {connection_host}'.format(script=script, master=master, connection_host=spark_cass_connection_host_arg) cmd = spark_cassandra_stress_cmd_prefix + ' ' + spark_cassandra_run_cmd return cmd