def get_local_run_environment_vars(instance_config, port0, framework): """Returns a dictionary of environment variables to simulate what would be available to a paasta service running in a container""" hostname = socket.getfqdn() docker_image = instance_config.get_docker_image() if docker_image == "": # In a local_run environment, the docker_image may not be available # so we can fall-back to the injected DOCKER_TAG per the paasta contract docker_image = os.environ["DOCKER_TAG"] fake_taskid = uuid.uuid4() env = { "HOST": hostname, "MESOS_SANDBOX": "/mnt/mesos/sandbox", "MESOS_CONTAINER_NAME": "localrun-%s" % fake_taskid, "MESOS_TASK_ID": str(fake_taskid), "PAASTA_DOCKER_IMAGE": docker_image, "PAASTA_LAUNCHED_BY": get_possible_launched_by_user_variable_from_env(), } if framework == "marathon": env["MARATHON_PORT"] = str(port0) env["MARATHON_PORT0"] = str(port0) env["MARATHON_PORTS"] = str(port0) env["MARATHON_PORT_%d" % instance_config.get_container_port()] = str(port0) env["MARATHON_APP_VERSION"] = "simulated_marathon_app_version" env["MARATHON_APP_RESOURCE_CPUS"] = str(instance_config.get_cpus()) env["MARATHON_APP_DOCKER_IMAGE"] = docker_image env["MARATHON_APP_RESOURCE_MEM"] = str(instance_config.get_mem()) env["MARATHON_APP_RESOURCE_DISK"] = str(instance_config.get_disk()) env["MARATHON_APP_LABELS"] = "" env["MARATHON_APP_ID"] = "/simulated_marathon_app_id" env["MARATHON_HOST"] = hostname env["PAASTA_HOST"] = hostname return env
def get_spark_env( args: argparse.Namespace, spark_conf_str: str, aws_creds: Tuple[Optional[str], Optional[str], Optional[str]], ui_port: str, ) -> Dict[str, str]: """Create the env config dict to configure on the docker container""" spark_env = {} if not args.disable_aws_credential_env_variables: access_key, secret_key, session_token = aws_creds if access_key: spark_env["AWS_ACCESS_KEY_ID"] = access_key spark_env["AWS_SECRET_ACCESS_KEY"] = secret_key if session_token is not None: spark_env["AWS_SESSION_TOKEN"] = session_token spark_env["AWS_DEFAULT_REGION"] = args.aws_region spark_env["PAASTA_LAUNCHED_BY"] = get_possible_launched_by_user_variable_from_env() spark_env["PAASTA_INSTANCE_TYPE"] = "spark" # Run spark (and mesos framework) as root. spark_env["SPARK_USER"] = "******" spark_env["SPARK_OPTS"] = spark_conf_str # Default configs to start the jupyter notebook server if args.cmd == "jupyter-lab": spark_env["JUPYTER_RUNTIME_DIR"] = "/source/.jupyter" spark_env["JUPYTER_DATA_DIR"] = "/source/.jupyter" spark_env["JUPYTER_CONFIG_DIR"] = "/source/.jupyter" elif args.cmd == "history-server": dirs = args.work_dir.split(":") spark_env["SPARK_LOG_DIR"] = dirs[1] if not args.spark_args or not args.spark_args.startswith( "spark.history.fs.logDirectory" ): print( "history-server requires spark.history.fs.logDirectory in spark-args", file=sys.stderr, ) sys.exit(1) spark_env["SPARK_HISTORY_OPTS"] = ( f"-D{args.spark_args} " f"-Dspark.history.ui.port={ui_port}" ) spark_env["SPARK_DAEMON_CLASSPATH"] = "/opt/spark/extra_jars/*" spark_env["SPARK_NO_DAEMONIZE"] = "true" return spark_env
def get_local_run_environment_vars(instance_config, port0, framework): """Returns a dictionary of environment variables to simulate what would be available to a paasta service running in a container""" hostname = socket.getfqdn() docker_image = instance_config.get_docker_image() if docker_image == '': # In a local_run environment, the docker_image may not be available # so we can fall-back to the injected DOCKER_TAG per the paasta contract docker_image = os.environ['DOCKER_TAG'] fake_taskid = uuid.uuid4() env = { 'HOST': hostname, 'MESOS_SANDBOX': '/mnt/mesos/sandbox', 'MESOS_CONTAINER_NAME': 'localrun-%s' % fake_taskid, 'MESOS_TASK_ID': str(fake_taskid), 'PAASTA_DOCKER_IMAGE': docker_image, 'PAASTA_LAUNCHED_BY': get_possible_launched_by_user_variable_from_env(), } if framework == 'marathon': env['MARATHON_PORT'] = str(port0) env['MARATHON_PORT0'] = str(port0) env['MARATHON_PORTS'] = str(port0) env['MARATHON_PORT_%d' % instance_config.get_container_port()] = str(port0) env['MARATHON_APP_VERSION'] = 'simulated_marathon_app_version' env['MARATHON_APP_RESOURCE_CPUS'] = str(instance_config.get_cpus()) env['MARATHON_APP_DOCKER_IMAGE'] = docker_image env['MARATHON_APP_RESOURCE_MEM'] = str(instance_config.get_mem()) env['MARATHON_APP_RESOURCE_DISK'] = str(instance_config.get_disk()) env['MARATHON_APP_LABELS'] = "" env['MARATHON_APP_ID'] = '/simulated_marathon_app_id' env['MARATHON_HOST'] = hostname elif framework == 'chronos': env['CHRONOS_RESOURCE_DISK'] = str(instance_config.get_disk()) env['CHRONOS_RESOURCE_CPU'] = str(instance_config.get_cpus()) env['CHRONOS_RESOURCE_MEM'] = str(instance_config.get_mem()) env['CHRONOS_JOB_OWNER'] = 'simulated-owner' env['CHRONOS_JOB_RUN_TIME'] = str(int(time.time())) env['CHRONOS_JOB_NAME'] = "{} {}".format( instance_config.get_service(), instance_config.get_instance()) env['CHRONOS_JOB_RUN_ATTEMPT'] = str(0) env['mesos_task_id'] = 'ct:simulated-task-id' return env
def get_spark_env( args, spark_conf, spark_ui_port, ): spark_env = {} access_key, secret_key = get_aws_credentials(args) spark_env['AWS_ACCESS_KEY_ID'] = access_key spark_env['AWS_SECRET_ACCESS_KEY'] = secret_key spark_env[ 'PAASTA_LAUNCHED_BY'] = get_possible_launched_by_user_variable_from_env( ) # Run spark (and mesos framework) as root. spark_env['SPARK_USER'] = '******' spark_env['SPARK_OPTS'] = spark_conf # Default configs to start the jupyter notebook server if args.cmd == 'jupyter': dirs = args.work_dir.split(':') spark_env['JUPYTER_RUNTIME_DIR'] = dirs[1] + '/.jupyter' spark_env['JUPYTER_DATA_DIR'] = dirs[1] + '/.jupyter' elif args.cmd == 'history-server': dirs = args.work_dir.split(':') spark_env['SPARK_LOG_DIR'] = dirs[1] if not args.spark_args or not args.spark_args.startswith( 'spark.history.fs.logDirectory'): paasta_print( "history-server requires spark.history.fs.logDirectory in spark-args", file=sys.stderr, ) sys.exit(1) spark_env['SPARK_HISTORY_OPTS'] = '-D%s -Dspark.history.ui.port=%d' % ( args.spark_args, spark_ui_port, ) spark_env['SPARK_NO_DAEMONIZE'] = 'true' return spark_env
def get_spark_env(args, spark_conf, spark_ui_port, access_key, secret_key): spark_env = {} if access_key is not None: spark_env["AWS_ACCESS_KEY_ID"] = access_key spark_env["AWS_SECRET_ACCESS_KEY"] = secret_key spark_env["AWS_DEFAULT_REGION"] = args.aws_region spark_env[ "PAASTA_LAUNCHED_BY"] = get_possible_launched_by_user_variable_from_env( ) spark_env["PAASTA_INSTANCE_TYPE"] = "spark" # Run spark (and mesos framework) as root. spark_env["SPARK_USER"] = "******" spark_env["SPARK_OPTS"] = spark_conf # Default configs to start the jupyter notebook server if args.cmd == "jupyter-lab": spark_env["JUPYTER_RUNTIME_DIR"] = "/source/.jupyter" spark_env["JUPYTER_DATA_DIR"] = "/source/.jupyter" spark_env["JUPYTER_CONFIG_DIR"] = "/source/.jupyter" elif args.cmd == "history-server": dirs = args.work_dir.split(":") spark_env["SPARK_LOG_DIR"] = dirs[1] if not args.spark_args or not args.spark_args.startswith( "spark.history.fs.logDirectory"): paasta_print( "history-server requires spark.history.fs.logDirectory in spark-args", file=sys.stderr, ) sys.exit(1) spark_env["SPARK_HISTORY_OPTS"] = "-D%s -Dspark.history.ui.port=%d" % ( args.spark_args, spark_ui_port, ) spark_env["SPARK_DAEMON_CLASSPATH"] = "/opt/spark/extra_jars/*" spark_env["SPARK_NO_DAEMONIZE"] = "true" return spark_env