Exemplo n.º 1
0
def submit_application():
    try:
        job_id = get_job_id_from_attached_file()
        job_settings = request.form.get('job_settings', "").split()
        job_args = request.form.get('job_args', "").split()
        main_class = request.form.get('main_class', None)
        spark_dir = utils.get_os_env('SPARK_DIR')
        import subprocess
        if main_class is not None:
            job_watchers[job_id] = subprocess.Popen([
                spark_dir + "/bin/spark-submit", "--class", main_class,
                "--master",
                get_master_uri(), "--jars", "/tmp/" + job_id
            ] + job_settings + ["/tmp/" + job_id] + job_args,
                                                    universal_newlines=True,
                                                    stdout=subprocess.PIPE)
        else:
            job_watchers[job_id] = subprocess.Popen([
                spark_dir + "/bin/spark-submit", "--master",
                get_master_uri(), "--py-files", "/tmp/" + job_id
            ] + job_settings + ["/tmp/" + job_id] + job_args,
                                                    universal_newlines=True,
                                                    stdout=subprocess.PIPE)
        return job_id
    except:
        return "Failed!", 500
Exemplo n.º 2
0
def submit_application():
    try:
        job_id = get_job_id_from_attached_file()
        job_settings = request.form.get('job_settings', "").split()
        job_args = request.form.get('job_args', "").split()
        main_class = request.form.get('main_class', None)
        spark_dir = utils.get_os_env('SPARK_DIR')
        import subprocess
        if main_class is not None:
            job_watchers[job_id] = subprocess.Popen([spark_dir + "/bin/spark-submit",
                                                     "--class", main_class,
                                                     "--master", get_master_uri(),
                                                     "--jars", "/tmp/" + job_id] + job_settings +
                                                    ["/tmp/" + job_id] + job_args,
                                                    universal_newlines=True,
                                                    stdout=subprocess.PIPE)
        else:
            job_watchers[job_id] = subprocess.Popen([spark_dir + "/bin/spark-submit",
                                                     "--master", get_master_uri(),
                                                     "--py-files", "/tmp/" + job_id] + job_settings +
                                                    ["/tmp/" + job_id] + job_args,
                                                    universal_newlines=True,
                                                    stdout=subprocess.PIPE)
        return job_id
    except:
        return "Failed!", 500
Exemplo n.º 3
0
def create_worker_process(masterUri):
    global start_worker
    global log_watchers
    if start_worker == 'true':
        if utils.get_os_env('START_MASTER').lower() != 'true':
            os.environ['SPARK_WORKER_PORT'] = "7077"
            logging.info("Spark master daemon not started, worker daemon will bind to port 7077.")
        else:
            logging.info("Spark master daemon running on port 7077, worker bind to random port.")

        worker_log = subprocess.check_output([spark_dir + "/sbin/start-slave.sh", masterUri], universal_newlines=True)
        log_watchers['Worker'] = subprocess.Popen(["tail", "-f", worker_log.rsplit(None, 1)[-1]])
Exemplo n.º 4
0
def send_query():
    try:
        job_id = get_job_id_from_attached_file()

        hive_vars = request.form.get('hive_vars', "").split()
        arg_hivevar = []
        if len(hive_vars) > 0:
            for hive_var in hive_vars:
                arg_hivevar += ['--hivevar', hive_var]

        hive_confs = request.form.get('hive_confs', "").split()
        arg_hiveconf = []
        if len(hive_confs) > 0:
            for hive_conf in hive_confs:
                arg_hiveconf += ['--hiveconf', hive_conf]

        application_id = utils.get_os_env('APPLICATION_ID')
        application_version = utils.get_os_env('APPLICATION_VERSION')
        hostname = utils.get_os_env('HOSTNAME')
        default_username = application_id + "-" + application_version + "-" + hostname

        username = request.form.get('username', default_username)
        arg_username = ['-n', username]

        password = request.form.get('password', "")
        arg_password = []
        if password != "":
            arg_password = ['-p', password]

        import subprocess
        job_watchers[job_id] = subprocess.Popen(
            ["/opt/beeline/beeline", "-u",
             get_thrift_server_uri()] + arg_hivevar + arg_hiveconf +
            arg_username + arg_password + ["-f", "/tmp/" + job_id],
            universal_newlines=True,
            stdout=subprocess.PIPE)
        return job_id
    except:
        return "Failed!", 500
Exemplo n.º 5
0
def send_query():
    try:
        job_id = get_job_id_from_attached_file()

        hive_vars = request.form.get('hive_vars', "").split()
        arg_hivevar = []
        if len(hive_vars) > 0:
            for hive_var in hive_vars:
                arg_hivevar += ['--hivevar', hive_var]

        hive_confs = request.form.get('hive_confs', "").split()
        arg_hiveconf = []
        if len(hive_confs) > 0:
            for hive_conf in hive_confs:
                arg_hiveconf += ['--hiveconf', hive_conf]

        application_id = utils.get_os_env('APPLICATION_ID')
        application_version = utils.get_os_env('APPLICATION_VERSION')
        hostname = utils.get_os_env('HOSTNAME')
        default_username = application_id + "-" + application_version + "-" + hostname

        username = request.form.get('username', default_username)
        arg_username = ['-n', username]

        password = request.form.get('password', "")
        arg_password = []
        if password != "":
            arg_password = ['-p', password]

        import subprocess
        job_watchers[job_id] = subprocess.Popen(["/opt/beeline/beeline", "-u", get_thrift_server_uri()] +
                                                arg_hivevar + arg_hiveconf + arg_username + arg_password +
                                                ["-f", "/tmp/" + job_id],
                                                universal_newlines=True,
                                                stdout=subprocess.PIPE)
        return job_id
    except:
        return "Failed!", 500
Exemplo n.º 6
0
def create_worker_process(masterUri):
    global start_worker
    global log_watchers
    if start_worker == 'true':
        if utils.get_os_env('START_MASTER').lower() != 'true':
            os.environ['SPARK_WORKER_PORT'] = "7077"
            logging.info(
                "Spark master daemon not started, worker daemon will bind to port 7077."
            )
        else:
            logging.info(
                "Spark master daemon running on port 7077, worker bind to random port."
            )

        worker_log = subprocess.check_output(
            [spark_dir + "/sbin/start-slave.sh", masterUri],
            universal_newlines=True)
        log_watchers['Worker'] = subprocess.Popen(
            ["tail", "-f", worker_log.rsplit(None, 1)[-1]])
Exemplo n.º 7
0
#!/usr/bin/env python3

import os
import boto3
import logging
import subprocess
import utils
from time import sleep

logging.basicConfig(level=getattr(logging, 'INFO', None))

spark_dir = utils.get_os_env('SPARK_DIR')

utils.set_ec2_identities()

zk_conn_str = ""
if utils.get_os_env('ZOOKEEPER_STACK_NAME') != "":
    zk_conn_str = utils.generate_zk_conn_str()
    os.environ['SPARK_DAEMON_JAVA_OPTS'] = "-Dspark.deploy.recoveryMode=ZOOKEEPER " \
                                           "-Dspark.deploy.zookeeper.url=" + zk_conn_str
    logging.info("HA mode enabled with ZooKeeper connection string " + zk_conn_str)

os.environ['ZOOKEEPER_CONN_STR'] = zk_conn_str

if utils.get_os_env('HIVE_SITE_XML') != "":
    hive_site_xml = utils.get_os_env('HIVE_SITE_XML')
    path = hive_site_xml[5:]
    bucket = path[:path.find('/')]
    file_key = path[path.find('/')+1:]
    s3 = boto3.resource('s3')
    try:
Exemplo n.º 8
0
    return retval


def get_job_output(job_id):
    if job_id in job_watchers:
        status = job_watchers[job_id].poll()
        if status is None:
            return "Job is still running, try again later.", 404
        elif status == 0:
            return get_output_stream(job_watchers[job_id]), 200
        else:
            return "Job failed!\n" + get_output_stream(
                job_watchers[job_id]), 500
    else:
        return "ID ot found!", 404


logging.basicConfig(level=getattr(logging, 'INFO', None))

api_args = {
    'auth_url': utils.get_os_env('AUTH_URL'),
    'tokeninfo_url': utils.get_os_env('TOKENINFO_URL'),
    'oauth2_scope': utils.get_os_env('OAUTH2_SCOPE')
}
webapp = connexion.App(__name__, port=8000, debug=True, server='gevent')
webapp.add_api('swagger.yaml', arguments=api_args)
application = webapp.app

if __name__ == '__main__':
    webapp.run()
Exemplo n.º 9
0
#!/usr/bin/env python3

import os
from boto3.s3.transfer import S3Transfer
import boto3
import logging
import subprocess
import utils
from time import sleep

logging.basicConfig(level=getattr(logging, 'INFO', None))

spark_dir = utils.get_os_env('SPARK_DIR')

if utils.get_os_env('PYTHON_LIBS') != "":
    python_libs = utils.get_os_env('PYTHON_LIBS').split(',')
    subprocess.Popen(["pip3", "install", "--upgrade"] + python_libs)

utils.set_ec2_identities()

zk_conn_str = ""
if utils.get_os_env('ZOOKEEPER_STACK_NAME') != "":
    zk_conn_str = utils.generate_zk_conn_str()
    os.environ['SPARK_DAEMON_JAVA_OPTS'] = "-Dspark.deploy.recoveryMode=ZOOKEEPER " \
                                           "-Dspark.deploy.zookeeper.url=" + zk_conn_str
    logging.info("HA mode enabled with ZooKeeper connection string " +
                 zk_conn_str)

os.environ['ZOOKEEPER_CONN_STR'] = zk_conn_str

if utils.get_os_env('HIVE_SITE_XML') != "":
Exemplo n.º 10
0
def get_output_stream(proc):
    retval = ""
    for line in iter(proc.stdout.readline, ''):
        retval += line.rstrip() + "\n"
    return retval


def get_job_output(job_id):
    if job_id in job_watchers:
        status = job_watchers[job_id].poll()
        if status is None:
            return "Job is still running, try again later.", 404
        elif status == 0:
            return get_output_stream(job_watchers[job_id]), 200
        else:
            return "Job failed!\n" + get_output_stream(job_watchers[job_id]), 500
    else:
        return "ID ot found!", 404

logging.basicConfig(level=getattr(logging, 'INFO', None))

api_args = {'auth_url': utils.get_os_env('AUTH_URL'),
            'tokeninfo_url': utils.get_os_env('TOKENINFO_URL'),
            'oauth2_scope': utils.get_os_env('OAUTH2_SCOPE')}
webapp = connexion.App(__name__, port=8000, debug=True, server='gevent')
webapp.add_api('swagger.yaml', arguments=api_args)
application = webapp.app

if __name__ == '__main__':
    webapp.run()