def __init__( self, bucket, local_dir=None, inst_data=None, prefix="dagster", use_ssl=True, verify=True, verify_cert_path=None, endpoint_url=None, skip_empty_files=False, ): _verify = False if not verify else verify_cert_path self._s3_session = boto3.resource( "s3", use_ssl=use_ssl, verify=_verify, endpoint_url=endpoint_url ).meta.client self._s3_bucket = check.str_param(bucket, "bucket") self._s3_prefix = check.str_param(prefix, "prefix") # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self._skip_empty_files = check.bool_param(skip_empty_files, "skip_empty_files")
def events_jar(): git_repo_root = (subprocess.check_output( ["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()) temp_dir = os.path.join(get_system_temp_directory(), "dagster_examples_tests", "event_pipeline_demo_tests") mkdir_p(temp_dir) dst = os.path.join(temp_dir, "events.jar") if os.path.exists(dst): print("events jar already exists, skipping") # pylint: disable=print-call else: subprocess.check_call(["sbt", "events/assembly"], cwd=os.path.join(git_repo_root, "scala_modules")) src = os.path.join( git_repo_root, "scala_modules", "events/target/scala-2.11/events-assembly-0.1.0-SNAPSHOT.jar", ) subprocess.check_call(["cp", src, dst]) yield dst
def __init__(self, run_id, type_storage_plugin_registry=None, base_dir=None): self.run_id = check.str_param(run_id, 'run_id') type_storage_plugin_registry = check.inst_param( type_storage_plugin_registry if type_storage_plugin_registry else TypeStoragePluginRegistry(types_to_register={}), 'type_storage_plugin_registry', TypeStoragePluginRegistry, ) self._base_dir = os.path.abspath( os.path.expanduser( check.opt_nonempty_str_param( base_dir, 'base_dir', seven.get_system_temp_directory()))) check.invariant( os.path.isdir(self._base_dir), 'Could not find a directory at the base_dir supplied to FileSystemIntermediateStore: ' '{base_dir}'.format(base_dir=self._base_dir), ) object_store = FileSystemObjectStore() root = object_store.key_for_paths( [self.base_dir, 'dagster', 'runs', run_id, 'files']) super(FileSystemIntermediateStore, self).__init__( object_store, root=root, type_storage_plugin_registry=type_storage_plugin_registry)
def operator_for_solid( cls, handle, pipeline_name, environment_dict, mode, solid_name, step_keys, dag, dag_id, op_kwargs, ): tmp_dir = op_kwargs.pop('tmp_dir', DOCKER_TEMPDIR) host_tmp_dir = op_kwargs.pop('host_tmp_dir', seven.get_system_temp_directory()) if 'storage' not in environment_dict: raise airflow_storage_exception(tmp_dir) # black 18.9b0 doesn't support py27-compatible formatting of the below invocation (omitting # the trailing comma after **op_kwargs) -- black 19.3b0 supports multiple python versions, # but currently doesn't know what to do with from __future__ import print_function -- see # https://github.com/ambv/black/issues/768 # fmt: off return DagsterDockerOperator( step=solid_name, environment_dict=environment_dict, dag=dag, tmp_dir=tmp_dir, pipeline_name=pipeline_name, mode=mode, step_keys=step_keys, task_id=solid_name, host_tmp_dir=host_tmp_dir, **op_kwargs )
def test_file_system_intermediate_store(): run_id = str(uuid.uuid4()) intermediate_store = FileSystemIntermediateStore(run_id=run_id) assert intermediate_store.root == os.path.join( seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object(True, context, RuntimeBool.inst(), ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool.inst(), ['true']).obj is True assert intermediate_store.uri_for_paths(['true' ]).startswith('file:///') assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['dslkfhjsdflkjfs']) is None finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass
def construct_environment_yaml(preset_name, config, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs if preset_name: if config: raise click.UsageError("Can not use --preset with --config.") cli_args = { "fn_name": pipeline_name, "pipeline_name": pipeline_name, "module_name": module_name, } pipeline = recon_repo_for_cli_args( cli_args).get_definition().get_pipeline(pipeline_name) run_config = pipeline.get_preset(preset_name).run_config else: config = list(config) run_config = load_yaml_from_glob_list(config) if config else {} # If not provided by the user, ensure we have storage location defined if "intermediate_storage" not in run_config: system_tmp_path = seven.get_system_temp_directory() dagster_tmp_path = os.path.join(system_tmp_path, "dagster-airflow", pipeline_name) run_config["intermediate_storage"] = { "filesystem": { "config": { "base_dir": dagster_tmp_path } } } return run_config
def __init__( self, bucket, local_dir=None, inst_data=None, prefix="dagster", json_credentials_envvar=None, ): self._bucket_name = check.str_param(bucket, "bucket") self._prefix = check.str_param(prefix, "prefix") if json_credentials_envvar: json_info_str = os.environ.get(json_credentials_envvar) credentials_info = json.loads(json_info_str) self._bucket = (storage.Client().from_service_account_info( credentials_info).bucket(self._bucket_name)) else: self._bucket = storage.Client().bucket(self._bucket_name) # Check if the bucket exists check.invariant(self._bucket.exists()) # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
def test_file_system_intermediate_store_composite_types_with_custom_serializer_for_inner_type( ): run_id = str(uuid.uuid4()) intermediate_store = FileSystemIntermediateStore(run_id=run_id) assert intermediate_store.root == os.path.join( seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object( ['foo', 'bar'], context, resolve_to_runtime_type(List[LowercaseString]).inst(), ['list'], ) assert intermediate_store.has_object(context, ['list']) assert intermediate_store.get_object( context, resolve_to_runtime_type(List[Bool]).inst(), ['list']).obj == ['foo', 'bar'] finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass
def events_jar(): git_repo_root = six.ensure_str( subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).strip()) temp_dir = os.path.join(get_system_temp_directory(), 'dagster_examples_tests', 'event_pipeline_demo_tests') mkdir_p(temp_dir) dst = os.path.join(temp_dir, 'events.jar') if os.path.exists(dst): print('events jar already exists, skipping') else: subprocess.check_call(['sbt', 'events/assembly'], cwd=os.path.join(git_repo_root, 'scala_modules')) src = os.path.join( git_repo_root, 'scala_modules', 'events/target/scala-2.11/events-assembly-0.1.0-SNAPSHOT.jar', ) subprocess.check_call(['cp', src, dst]) yield dst
def get_active_repository_data_from_image(image): check.str_param(image, 'image') with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir: output_file_name = "{}.json".format(uuid4()) run_serialized_container_command( image=image, command='dagster repository snapshot {output_file}'.format( output_file=os.path.join(DEFAULT_INTERNAL_VOLUME, output_file_name)), volumes={ tmp_dir: { 'bind': DEFAULT_INTERNAL_VOLUME, 'mode': DEFAULT_MODE } }, ) active_repo_data = _get_active_repo_data( os.path.join(tmp_dir, output_file_name)) if not isinstance(active_repo_data, ActiveRepositoryData): raise DagsterInvariantViolationError( "Deserialized snapshot is of type {received} must be a ActiveRepositoryData" .format(received=type(active_repo_data))) return active_repo_data
def __init__( self, bucket, local_dir=None, inst_data=None, prefix='dagster', use_ssl=True, verify=True, verify_cert_path=None, endpoint_url=None, ): _verify = False if not verify else verify_cert_path self._s3_session = create_s3_session(use_ssl=use_ssl, verify=_verify, endpoint_url=endpoint_url) self._s3_bucket = check.str_param(bucket, 'bucket') self._s3_prefix = check.str_param(prefix, 'prefix') self._download_urls = {} # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) self._inst_data = check.opt_inst_param(inst_data, 'inst_data', ConfigurableClassData)
def conditionally_fail(_): if os.path.isfile( os.path.join(get_system_temp_directory(), "chained_failure_pipeline_conditionally_fail")): raise Exception("blah") return "hello"
def get_papermill_parameters(step_context, inputs, output_log_path): check.inst_param(step_context, "step_context", StepExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "step_context", "StepExecutionContext must have valid run_config", ) check.dict_param(inputs, "inputs", key_type=str) run_id = step_context.run_id temp_dir = get_system_temp_directory() marshal_dir = os.path.normpath( os.path.join(temp_dir, "dagstermill", str(run_id), "marshal")) mkdir_p(marshal_dir) if not isinstance(step_context.pipeline, ReconstructablePipeline): raise DagstermillError( "Can't execute a dagstermill solid from a pipeline that is not reconstructable. " "Use the reconstructable() function if executing from python") dm_executable_dict = step_context.pipeline.to_dict() dm_context_dict = { "output_log_path": output_log_path, "marshal_dir": marshal_dir, "run_config": step_context.run_config, } dm_solid_handle_kwargs = step_context.solid_handle._asdict() parameters = {} input_def_dict = step_context.solid_def.input_dict for input_name, input_value in inputs.items(): assert ( input_name not in RESERVED_INPUT_NAMES ), "Dagstermill solids cannot have inputs named {input_name}".format( input_name=input_name) dagster_type = input_def_dict[input_name].dagster_type parameter_value = write_value( dagster_type, input_value, os.path.join( marshal_dir, f"{str(step_context.solid_handle)}-input-{input_name}"), ) parameters[input_name] = parameter_value parameters["__dm_context"] = dm_context_dict parameters["__dm_executable_dict"] = dm_executable_dict parameters["__dm_pipeline_run_dict"] = pack_value( step_context.pipeline_run) parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs parameters["__dm_instance_ref_dict"] = pack_value( step_context.instance.get_ref()) return parameters
def open_server_process( port, socket, loadable_target_origin=None, max_workers=1, heartbeat=False, heartbeat_timeout=30, lazy_load_user_code=False, ): check.invariant((port or socket) and not (port and socket), "Set only port or socket") check.opt_inst_param(loadable_target_origin, "loadable_target_origin", LoadableTargetOrigin) check.int_param(max_workers, "max_workers") output_file = os.path.join( get_system_temp_directory(), "grpc-server-startup-{uuid}".format(uuid=uuid.uuid4().hex)) subprocess_args = ( [ loadable_target_origin.executable_path if loadable_target_origin and loadable_target_origin.executable_path else sys.executable, "-m", "dagster.grpc", ] + (["--port", str(port)] if port else []) + (["--socket", socket] if socket else []) + ["-n", str(max_workers)] + (["--heartbeat"] if heartbeat else []) + (["--heartbeat-timeout", str(heartbeat_timeout)] if heartbeat_timeout else []) + (["--lazy-load-user-code"] if lazy_load_user_code else []) + (["--ipc-output-file", output_file])) if loadable_target_origin: subprocess_args += ((([ "-f", loadable_target_origin.python_file, ] + (["-d", loadable_target_origin.working_directory] if loadable_target_origin.working_directory else ["--empty-working-directory"])) if loadable_target_origin.python_file else []) + (["-m", loadable_target_origin.module_name] if loadable_target_origin.module_name else []) + (["-a", loadable_target_origin.attribute] if loadable_target_origin.attribute else [])) server_process = open_ipc_subprocess(subprocess_args) try: wait_for_grpc_server(output_file) except: if server_process.poll() is None: server_process.terminate() raise return server_process
def test_write_configs(): ec2_config = EC2Config( remote_host='foo', region='us-west-1', security_group_id='sg-12345', key_pair_name='foobar', key_file_path='/some/path', ami_id='ami-12345', ) rds_config = RDSConfig(instance_name='foo', instance_uri='foo-bar.amazonaws.com', password='******') # Ensure unique dir for test tmp_dir = os.path.join(seven.get_system_temp_directory(), uuid.uuid4().hex) outfile = os.path.join(tmp_dir, HOST_CONFIG_FILE) ec2_config.save(tmp_dir) rds_config.save(tmp_dir) with open(outfile) as f: record = yaml.load(f) ec2_config_dict = record['ec2'] rds_config_dict = record['rds'] assert ec2_config_dict['remote_host'] == 'foo' assert ec2_config_dict['region'] == 'us-west-1' assert ec2_config_dict['security_group_id'] == 'sg-12345' assert ec2_config_dict['key_pair_name'] == 'foobar' assert ec2_config_dict['key_file_path'] == '/some/path' assert ec2_config_dict['ami_id'] == 'ami-12345' assert EC2Config.load(tmp_dir) == EC2Config( **ec2_config_dict) == ec2_config assert rds_config_dict['instance_name'] == 'foo' assert rds_config_dict['instance_uri'] == 'foo-bar.amazonaws.com' assert rds_config_dict['storage_size_gb'] == 20 assert rds_config_dict['db_engine'] == 'postgres' assert rds_config_dict['db_engine_version'] == '11.5' assert RDSConfig.load(tmp_dir) == RDSConfig(**rds_config_dict) # Delete both configs res = rds_config.delete(tmp_dir) assert res assert not RDSConfig.exists(tmp_dir) res = ec2_config.delete(tmp_dir) assert res assert not EC2Config.exists(tmp_dir) # Try to delete non-existent config res = rds_config.delete(tmp_dir) assert not res
def __init__(self, bucket, local_dir=None, inst_data=None): self._s3_session = create_s3_session() self._s3_bucket = check.str_param(bucket, 'bucket') self._download_urls = {} # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) super(S3ComputeLogManager, self).__init__(inst_data=inst_data)
def __init__(self, bucket, local_dir=None, inst_data=None, prefix='dagster'): self._s3_session = create_s3_session() self._s3_bucket = check.str_param(bucket, 'bucket') self._s3_prefix = check.str_param(prefix, 'prefix') self._download_urls = {} # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) self._inst_data = check.opt_inst_param(inst_data, 'inst_data', ConfigurableClassData)
def test_ssh_sftp(sftpserver): tmp_path = get_system_temp_directory() readme_file = os.path.join(tmp_path, "readme.txt") @solid( config_schema={ "local_filepath": Field(str, is_required=True, description="local file path to get"), "remote_filepath": Field(str, is_required=True, description="remote file path to get"), }, required_resource_keys={"ssh_resource"}, ) def sftp_solid_get(context): local_filepath = context.solid_config.get("local_filepath") remote_filepath = context.solid_config.get("remote_filepath") return context.resources.ssh_resource.sftp_get(remote_filepath, local_filepath) with sftpserver.serve_content({"a_dir": {"readme.txt": "hello, world"}}): result = execute_solid( sftp_solid_get, ModeDefinition(resource_defs={"ssh_resource": sshresource}), run_config={ "solids": { "sftp_solid_get": { "config": { "local_filepath": readme_file, "remote_filepath": "a_dir/readme.txt", } } }, "resources": { "ssh_resource": { "config": { "remote_host": sftpserver.host, "remote_port": sftpserver.port, "username": "******", "password": "******", "no_host_key_check": True, } } }, }, ) assert result.success with open(readme_file, "rb") as f: contents = f.read() assert b"hello, world" in contents
def test_ssh_sftp(sftpserver): tmp_path = get_system_temp_directory() readme_file = os.path.join(tmp_path, 'readme.txt') @solid( config_schema={ 'local_filepath': Field(str, is_required=True, description='local file path to get'), 'remote_filepath': Field(str, is_required=True, description='remote file path to get'), }, required_resource_keys={'ssh_resource'}, ) def sftp_solid_get(context): local_filepath = context.solid_config.get('local_filepath') remote_filepath = context.solid_config.get('remote_filepath') return context.resources.ssh_resource.sftp_get(remote_filepath, local_filepath) with sftpserver.serve_content({'a_dir': {'readme.txt': 'hello, world'}}): result = execute_solid( sftp_solid_get, ModeDefinition(resource_defs={'ssh_resource': sshresource}), run_config={ 'solids': { 'sftp_solid_get': { 'config': { 'local_filepath': readme_file, 'remote_filepath': 'a_dir/readme.txt', } } }, 'resources': { 'ssh_resource': { 'config': { 'remote_host': sftpserver.host, 'remote_port': sftpserver.port, 'username': '******', 'password': '******', 'no_host_key_check': True, } } }, }, ) assert result.success with open(readme_file, 'rb') as f: contents = f.read() assert b'hello, world' in contents
def get_papermill_parameters(step_context, inputs, output_log_path, compute_descriptor): check.inst_param(step_context, "step_context", StepExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "step_context", "StepExecutionContext must have valid run_config", ) check.dict_param(inputs, "inputs", key_type=str) run_id = step_context.run_id temp_dir = get_system_temp_directory() marshal_dir = os.path.normpath( os.path.join(temp_dir, "dagstermill", str(run_id), "marshal")) mkdir_p(marshal_dir) if not isinstance(step_context.pipeline, ReconstructablePipeline): if compute_descriptor == "solid": raise DagstermillError( "Can't execute a dagstermill solid from a pipeline that is not reconstructable. " "Use the reconstructable() function if executing from python") else: raise DagstermillError( "Can't execute a dagstermill op from a job that is not reconstructable. " "Use the reconstructable() function if executing from python") dm_executable_dict = step_context.pipeline.to_dict() dm_context_dict = { "output_log_path": output_log_path, "marshal_dir": marshal_dir, "run_config": step_context.run_config, } dm_solid_handle_kwargs = step_context.solid_handle._asdict() dm_step_key = step_context.step.key parameters = {} parameters["__dm_context"] = dm_context_dict parameters["__dm_executable_dict"] = dm_executable_dict parameters["__dm_pipeline_run_dict"] = pack_value( step_context.pipeline_run) parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs parameters["__dm_instance_ref_dict"] = pack_value( step_context.instance.get_ref()) parameters["__dm_step_key"] = dm_step_key parameters["__dm_input_names"] = list(inputs.keys()) return parameters
def test_remove_ssh_key(): # Ensure SSH is running subprocess.call(['ssh-agent', '-s']) test_key_path = os.path.join(seven.get_system_temp_directory(), 'test.pem') with open(test_key_path, 'wb') as f: f.write(TEST_PEM_PRIVATE_KEY) os.chmod(test_key_path, 0o600) subprocess.call(['ssh-add', '-D']) assert remove_ssh_key('does/not/matter') subprocess.call(['ssh-add', test_key_path]) assert not remove_ssh_key('/key/does/not/exist.pem') assert remove_ssh_key(test_key_path)
def __init__(self, dagster_operator_parameters, *args): kwargs = dagster_operator_parameters.op_kwargs tmp_dir = kwargs.pop("tmp_dir", DOCKER_TEMPDIR) host_tmp_dir = kwargs.pop("host_tmp_dir", seven.get_system_temp_directory()) self.host_tmp_dir = host_tmp_dir self.docker_conn_id_set = kwargs.get("docker_conn_id") is not None self.run_config = dagster_operator_parameters.run_config self.pipeline_name = dagster_operator_parameters.pipeline_name self.pipeline_snapshot = dagster_operator_parameters.pipeline_snapshot self.execution_plan_snapshot = dagster_operator_parameters.execution_plan_snapshot self.parent_pipeline_snapshot = dagster_operator_parameters.parent_pipeline_snapshot self.mode = dagster_operator_parameters.mode self.step_keys = dagster_operator_parameters.step_keys self.recon_repo = dagster_operator_parameters.recon_repo self._run_id = None self.instance_ref = dagster_operator_parameters.instance_ref check.invariant(self.instance_ref) self.instance = DagsterInstance.from_ref(self.instance_ref) # These shenanigans are so we can override DockerOperator.get_hook in order to configure # a docker client using docker.from_env, rather than messing with the logic of # DockerOperator.execute if not self.docker_conn_id_set: try: from_env().version() except Exception: pass else: kwargs["docker_conn_id"] = True if "environment" not in kwargs: kwargs["environment"] = get_aws_environment() super(DagsterDockerOperator, self).__init__( task_id=dagster_operator_parameters.task_id, dag=dagster_operator_parameters.dag, tmp_dir=tmp_dir, host_tmp_dir=host_tmp_dir, xcom_push=True, # We do this because log lines won't necessarily be emitted in order (!) -- so we can't # just check the last log line to see if it's JSON. xcom_all=True, *args, **kwargs, )
def __init__( self, bucket, local_dir=None, inst_data=None, prefix="dagster", ): self._bucket_name = check.str_param(bucket, "bucket") self._prefix = check.str_param(prefix, "prefix") self._bucket = storage.Client().get_bucket(self._bucket_name) # proxy calls to local compute log manager (for subscriptions, etc) if not local_dir: local_dir = seven.get_system_temp_directory() self.local_manager = LocalComputeLogManager(local_dir) self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
def __init__(self, run_id, types_to_register=None, base_dir=None): self.run_id = check.str_param(run_id, 'run_id') self.storage_mode = RunStorageMode.FILESYSTEM self._base_dir = os.path.abspath( os.path.expanduser( check.opt_nonempty_str_param( base_dir, 'base_dir', seven.get_system_temp_directory() ) ) ) check.invariant( os.path.isdir(self._base_dir), 'Could not find a directory at the base_dir supplied to FileSystemObjectStore: ' '{base_dir}'.format(base_dir=self._base_dir), ) self.root = get_run_files_directory(self.base_dir, run_id) super(FileSystemObjectStore, self).__init__(types_to_register)
def remove_ssh_key(key_file_path): # We have to clean up after ourselves to avoid "Too many authentication failures" issue. Term.waiting('Removing SSH key from authentication agent...') # AWS only gives us the private key contents; ssh-add uses the private key for adding but the # public key for removing try: public_keys = six.ensure_str(subprocess.check_output( ['ssh-add', '-L'])).strip().split('\n') except subprocess.CalledProcessError: Term.rewind() Term.info('No identities found, skipping') return True filtered_public_keys = [key for key in public_keys if key_file_path in key] public_key = filtered_public_keys[0] if filtered_public_keys else None if public_key: tmp_pub_file = os.path.join(seven.get_system_temp_directory(), uuid.uuid4().hex + '-tmp-pubkey') with open(tmp_pub_file, 'wb') as f: f.write(six.ensure_binary(public_key)) res = subprocess.Popen(['ssh-add', '-d', tmp_pub_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() res = six.ensure_str(res[0]) os.unlink(tmp_pub_file) if 'Identity removed' in res: Term.rewind() Term.success('key deleted successfully') return True else: Term.warning('Could not remove key, error: %s' % res) return False else: Term.rewind() Term.info('key not found, skipping') return False return True
def sync_dagster_yaml(ec2_config, rds_config): '''Configure Dagster instance to use PG storage by putting a dagster.yaml file in the remote DAGSTER_HOME directory ''' with open(os.path.join(os.path.dirname(__file__), 'conf', 'dagster.template.yaml'), 'rb') as f: dagster_yaml = six.ensure_str(f.read()).format( username=rds_config.username, password=rds_config.password, hostname=rds_config.instance_uri, db_name=rds_config.db_name, port=DEFAULT_RDS_PORT, ) tmp_file = os.path.join(seven.get_system_temp_directory(), 'dagster.yaml') with open(tmp_file, 'wb') as f: f.write(six.ensure_binary(dagster_yaml)) rsync_to_remote(ec2_config.key_file_path, tmp_file, ec2_config.remote_host, SERVER_DAGSTER_HOME)
def sync_dagster_yaml(ec2_config, rds_config): '''Configure Dagster instance to use PG storage by putting a dagster.yaml file in the remote DAGSTER_HOME directory ''' with open(os.path.join(os.path.dirname(__file__), 'conf', 'dagster.template.yaml'), 'rb') as f: dagster_yaml = six.ensure_str(f.read()) dagster_yaml = ( dagster_yaml.replace('{username}', rds_config.username) .replace('{password}', rds_config.password) .replace('{host}', rds_config.instance_uri) .replace('{database}', rds_config.db_name) ) tmp_file = os.path.join(seven.get_system_temp_directory(), 'dagster.yaml') with open(tmp_file, 'wb') as f: f.write(six.ensure_binary(dagster_yaml)) rsync_to_remote(ec2_config.key_file_path, tmp_file, ec2_config.remote_host, SERVER_DAGSTER_HOME)
def execute_pipeline_iterator_from_image( image, pipeline_name, environment_dict=None, mode=None, solid_subset=None ): # This method currently depends on file mounts, and will not work when executing within # a docker container check.str_param(image, 'image') check.str_param(pipeline_name, 'pipeline_name') check.opt_dict_param(environment_dict, 'environment-dict', key_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_list_param(solid_subset, 'solid-subset', of_type="str") if not environment_dict: environment_dict = {} with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir: output_file_name = "{}.json".format(uuid4()) command = ( "dagster api execute_pipeline -y repository.yaml {pipeline_name} " "{output_file} --environment-dict='{environment_dict}' --mode={mode}".format( pipeline_name=pipeline_name, output_file=os.path.join(DEFAULT_INTERNAL_VOLUME, output_file_name), environment_dict=json.dumps(environment_dict), mode=mode, ) ) if solid_subset: command += " --solid_subset={solid_subset}".format(solid_subset=",".join(solid_subset)) for event in run_detached_container_command( image=image, command=command, volumes={tmp_dir: {'bind': DEFAULT_INTERNAL_VOLUME, 'mode': DEFAULT_MODE}}, output_file=os.path.join(tmp_dir, output_file_name), ): yield event
def get_active_repository_data_from_image(image): check.str_param(image, 'image') with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir: output_file_name = "{}.json".format(uuid4()) command = 'dagster api snapshot repository'.format( output_file=os.path.join(DEFAULT_INTERNAL_VOLUME, output_file_name)) output = run_serialized_container_command( image=image, command=command, volumes={ tmp_dir: { 'bind': DEFAULT_INTERNAL_VOLUME, 'mode': DEFAULT_MODE } }, ) if len(output) != 1: print(output) raise DagsterInvariantViolationError( "Running command {command} in container {image} resulted in output of length " "{actual} lines, expected {expected} lines".format( command=command, image=image, actual=len(output), expected=1)) serialized_active_repo_data = output[0] active_repo_data = deserialize_json_to_dagster_namedtuple( serialized_active_repo_data) if not isinstance(active_repo_data, ActiveRepositoryData): raise DagsterInvariantViolationError( "Deserialized snapshot is of type {received} must be a ActiveRepositoryData" .format(received=type(active_repo_data))) return active_repo_data
def test_file_system_intermediate_store_composite_types(): run_id = str(uuid.uuid4()) intermediate_store = FileSystemIntermediateStore(run_id=run_id) assert intermediate_store.root == os.path.join( seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object([True, False], context, resolve_to_runtime_type( List(Bool)).inst(), ['bool']) assert intermediate_store.has_object(context, ['bool']) assert intermediate_store.get_object( context, resolve_to_runtime_type(List(Bool)).inst(), ['bool']) == [True, False] finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass