def get_undefined_variables_in_metadata_db(remote: Optional[str], var_ids: List[str]): undefined_variables = [] for var_id in var_ids: try: Settings.metadata_store( Remotes.aws_profile(remote)).get_variable(var_id) except MetadataObjectNotFound: undefined_variables.append(var_id) return undefined_variables
def get_undefined_connections_in_metadata_db(remote: Optional[str], conn_ids: List[str]): undefined_connections = [] for conn_id in conn_ids: try: Settings.metadata_store( Remotes.aws_profile(remote)).get_connection(conn_id) except MetadataObjectNotFound: undefined_connections.append(conn_id) return undefined_connections
def list_dags(remote: Optional[str], long: bool, json_output): set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) if long: header = ['DAG_NAME', 'DEPLOYMENT_DATE'] table_body = [[x.dag_name, x.deployment_date.isoformat()] for x in metadata_store.get_dag_deployments()] print(tabulate(table_body, header, 'plain')) if not remote: dag_errors = get_dag_errors() if dag_errors: header = ['DAG_NAME', 'ERROR LOCATION', 'ERROR MESSAGE'] table_body = [[dag_name, error[0]['loc'], error[0]['msg']] for dag_name, error in dag_errors.items()] print(colored(tabulate(table_body, header, 'plain'), 'red'), file=sys.stderr) else: dag_names = sorted( set(x.dag_name for x in metadata_store.get_dag_deployments())) if json_output: print(json.dumps(dag_names)) else: for dag_name in dag_names: print(dag_name) if not remote: for dag_name, _ in get_dag_errors().items(): print(colored(dag_name, 'red'), file=sys.stderr)
def add_variable(remote: Optional[str], var_id: str, var_type: str, contents): """Add variable to the metadata store""" set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) var = Variable(var_id, VariableType[var_type.upper()], contents) metadata_store.set_variable(var) print(f'Variable {var_id} added')
def load_variable(remote: Optional[str], file: str): """Read variable from file and add it to the metadata store""" var = Variable.from_file(file) set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) metadata_store.set_variable(var) print(f'Variable {var.id} added')
def build_all_dags_airflow(remote: Optional[str], matching: Optional[str] = None): airflow_home = Path(os.environ['AIRFLOW_HOME']) target_folder: Path = airflow_home / 'dags/typhoon_managed/' target_folder.mkdir(parents=True, exist_ok=True) rmtree(str(target_folder), ignore_errors=True) target_folder.mkdir() print('Build all DAGs...') dags = load_dag_definitions(ignore_errors=True) for dag, _ in dags: print(f'Found DAG {dag.name}') if not matching or re.match(matching, dag.name): dag_target_folder = target_folder / dag.name dag_target_folder.mkdir(parents=True, exist_ok=True) init_path = (dag_target_folder / '__init__.py') init_path.write_text('') store: AirflowMetadataStore = Settings.metadata_store() start_date = None with set_airflow_db(store.db_path, store.fernet_key) as db: dag_run = db.get_first_dag_run(dag.name) if dag_run: start_date = dag_run.execution_date.replace(tzinfo=None) compiled_dag = AirflowDagFile(dag, start_date=start_date, debug_mode=remote is None).render() (dag_target_folder / f'{dag.name}.py').write_text(compiled_dag) tasks_code = TasksFile(dag.tasks).render() (dag_target_folder / 'tasks.py').write_text(tasks_code) for component, _ in load_components(ignore_errors=False, kind='all'): print(f'Building component {component.name}...') build_component_for_airflow(component, target_folder)
def get_hook(conn_id: str) -> HookInterface: metadata_store = Settings.metadata_store() conn = metadata_store.get_connection(conn_id) hook = get_user_defined_hook(conn_id, metadata_store) if hook: return hook hook_class = get_hooks_info()[conn.conn_type] return hook_class(conn.get_connection_params())
def ddb_store(): Settings.typhoon_home = '/tmp/foo' Settings.project_name = 'unittests' Settings.metadata_db_url = f'dynamodb:Host=localhost:8080;Region=eu-west-1' ddb_store = Settings.metadata_store() ddb_store.migrate() return ddb_store
def test_set_and_get_variables(cfg_path): store = Settings.metadata_store() store.set_variable(sample_var) store.set_variable(other_sample_var) assert store.get_variables() == [sample_var, other_sample_var] or \ store.get_variables() == [other_sample_var, sample_var] assert store.get_variables(to_dict=True) == [sample_var.__dict__, other_sample_var.__dict__] or \ store.get_variables(to_dict=True) == [other_sample_var.__dict__, sample_var.__dict__]
def test_delete_variable(cfg_path): store = Settings.metadata_store() store.set_variable(sample_var) store.delete_variable(sample_var) with pytest.raises(MetadataObjectNotFound): store.get_variable(sample_var) with pytest.raises(MetadataObjectNotFound): store.get_variable(sample_var.id)
def add_connection(remote: Optional[str], conn_id: str, conn_env: str): """Add connection to the metadata store""" set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) conn_params = connections.get_connection_local(conn_id, conn_env) metadata_store.set_connection( Connection(conn_id=conn_id, **asdict(conn_params))) print(f'Connection {conn_id} added')
def test_delete_connection(cfg_path): store = Settings.metadata_store() store.set_connection(sample_conn) store.delete_connection(sample_conn) with pytest.raises(MetadataObjectNotFound): store.get_connection(sample_conn) with pytest.raises(MetadataObjectNotFound): store.get_connection(sample_conn.conn_id)
def test_set_and_get_connections(cfg_path): store = Settings.metadata_store() store.set_connection(sample_conn) store.set_connection(other_sample_conn) assert store.get_connections() == [sample_conn, other_sample_conn] or \ store.get_connections() == [other_sample_conn, sample_conn] assert store.get_connections(to_dict=True) == [sample_conn.__dict__, other_sample_conn.__dict__] or \ store.get_connections(to_dict=True) == [other_sample_conn.__dict__, sample_conn.__dict__]
def test_typhoon_config(tmp_path): Settings.typhoon_home = tmp_path Settings.metadata_db_url = f'sqlite:{tmp_path/"test.db"}' Settings.project_name = 'integrationtests' assert Settings.typhoon_home == tmp_path assert isinstance(Settings.metadata_store(), SQLiteMetadataStore) assert Settings.project_name == 'integrationtests'
def test_sqlite_metadata_store(typhoon_home): store = Settings.metadata_store() assert isinstance(store, SQLiteMetadataStore) conn = Connection(conn_id='foo', conn_type='s3') store.set_connection(conn) assert store.get_connection('foo') == conn var = Variable(id='bar', type=VariableType.STRING, contents='lorem ipsum') store.set_variable(var) assert store.get_variable('bar') == var
def build_dag(dag: DAGDefinitionV2, dag_file: Path, deployment_date: datetime, remote: Optional[str]): dag = dag.dict() dag_folder = Settings.out_directory / dag['name'] transpile_dag_and_store(dag, dag_folder, debug_mode=remote is None) deploy_dag_requirements(dag, typhoon_version_is_local(), Settings.typhoon_version) if not remote: print('Setting up user defined code as symlink for debugging...') copy_user_defined_code(dag, symlink=remote is None) try: Settings.metadata_store(aws_profile=None).set_dag_deployment( DagDeployment( dag_name=dag['name'], deployment_date=deployment_date, dag_code=dag_file.read_text(), )) except TyphoonResourceNotFoundError: print( f'WARNING: DynamoDB table {Settings.dag_deployments_table_name} does not exist. Skipping deployment metadata...' )
def get_user_defined_hook(conn_id: str, metadata_store: MetadataStoreInterface = None) -> HookInterface: if metadata_store is None: metadata_store = Settings.metadata_store() conn = metadata_store.get_connection(conn_id) hooks_files = (Settings.typhoon_home / 'hooks').rglob('*.py') for hooks_file in hooks_files: spec = importlib.util.spec_from_file_location(str(hooks_file).split('.py')[0], str(hooks_file)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) for cls_name, cls in inspect.getmembers(module, inspect.isclass): conn_type = getattr(cls, 'conn_type', None) if conn_type == conn.conn_type: return cls(conn.get_connection_params())
def list_connections(remote: Optional[str], long: bool): """List connections in the metadata store""" set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) if long: header = ['CONN_ID', 'TYPE', 'HOST', 'PORT', 'SCHEMA'] table_body = [[ conn.conn_id, conn.conn_type, conn.host, conn.port, conn.schema ] for conn in metadata_store.get_connections()] print(tabulate(table_body, header, 'plain')) else: for conn in metadata_store.get_connections(): print(conn.conn_id)
def set_settings_from_remote(remote: str): if remote: if remote not in Remotes.remotes_config.keys(): print( f'Remote {remote} is not defined in .typhoonremotes. Found : {list(Remotes.remotes_config.keys())}', file=sys.stderr) sys.exit(-1) Settings.metadata_db_url = Remotes.metadata_db_url(remote) if Settings.metadata_store( Remotes.aws_profile(remote)).name == 'airflow': Settings.fernet_key = Remotes.fernet_key(remote) if Remotes.use_name_as_suffix(remote): Settings.metadata_suffix = remote
def dags_without_deploy(remote: Optional[str]) -> List[str]: add_yaml_constructors() undeployed_dags = [] for dag_code in get_dags_contents(Settings.dags_directory): loaded_dag = yaml.load(dag_code, yaml.FullLoader) dag_deployment = DagDeployment(dag_name=loaded_dag['name'], deployment_date=datetime.utcnow(), dag_code=dag_code) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) if loaded_dag.get('active', True): try: _ = metadata_store.get_dag_deployment( dag_deployment.deployment_hash) except MetadataObjectNotFound: undeployed_dags.append(dag_deployment.dag_name) return undeployed_dags
def list_variables(remote: Optional[str], long: bool): """List variables in the metadata store""" def var_contents(var: Variable) -> str: if var.type == VariableType.NUMBER: return var.contents else: return f'"{var.contents}"' if len( var.contents ) < max_len_var else f'"{var.contents[:max_len_var]}"...' set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) if long: max_len_var = 40 header = ['VAR_ID', 'TYPE', 'CONTENT'] table_body = [[var.id, var.type, var_contents(var)] for var in metadata_store.get_variables()] print(tabulate(table_body, header, 'plain')) else: for var in metadata_store.get_variables(): print(var.id)
def generate_terraform_files(remote: Optional[str] = None, force: bool = False, minimal: bool = False): terraform_dest_folder = (Settings.typhoon_home / 'terraform') if not force and terraform_dest_folder.exists(): print( f'Cannot generate terraform files because the folder exists already {terraform_dest_folder}' ) print('Run with -f/--force to force overwrite') exit(1) main_tf_file = TERRAFORM_FOLDER_PATH / 'main.tf' env_name = remote or 'test' tfvars_template = jinja2.Template( (TERRAFORM_FOLDER_PATH / 'env.tfvars.j2').read_text()) rendered_tfvars = tfvars_template.render( dict( env=env_name, runtime='python{}.{}'.format(*sys.version_info), connections_table=None if minimal else Settings.connections_table_name, variables_table=None if minimal else Settings.variables_table_name, dag_deployments_table=None if minimal else Settings.dag_deployments_table_name, metadata_db_url=None if minimal else Remotes.metadata_db_url(remote), metadata_suffix=None if minimal else Settings.metadata_suffix, s3_bucket=Remotes.s3_bucket(remote) if remote else '', project_name=Settings.project_name, )) terraform_dest_folder.mkdir(exist_ok=True) shutil.copy(str(main_tf_file), str(terraform_dest_folder)) if not minimal: metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) metadata_store_tf_file = TERRAFORM_FOLDER_PATH / f'metadata_stores/{metadata_store.name}.tf' shutil.copy(str(metadata_store_tf_file), str(terraform_dest_folder)) (terraform_dest_folder / f'{env_name}.tfvars').write_text(rendered_tfvars)
def status(remote: Optional[str]): """Information on project status""" set_settings_from_remote(remote) print(colored(ascii_art_logo, 'cyan')) if not Settings.typhoon_home: print(colored(f'FATAL: typhoon home not found...', 'red')) return else: print(colored('• Typhoon home defined as', 'green'), colored(Settings.typhoon_home, 'blue')) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) if metadata_store.exists(): print(colored('• Metadata database found in', 'green'), colored(Settings.metadata_db_url, 'blue')) check_connections_yaml(remote) check_connections_dags(remote) check_variables_dags(remote) elif isinstance(metadata_store, SQLiteMetadataStore): print(colored('• Metadata store not found for', 'yellow'), colored(Settings.metadata_db_url, 'blue')) print( colored( ' - It will be created upon use, or create by running (idempotent) command', color=None), colored(f'typhoon migrate{" " + remote if remote else ""}', 'blue')) print(colored(' Skipping connections and variables checks...', 'red')) else: print(colored('• Metadata store not found or incomplete for', 'red'), colored(Settings.metadata_db_url, 'blue')) print( colored(' - Fix by running (idempotent) command', color=None), colored( f'typhoon metadata migrate{" " + remote if remote else ""}', 'blue')) print(colored(' Skipping connections and variables checks...', 'red')) if not remote: changed_dags = dags_with_changes() if changed_dags: print( colored('• Unbuilt changes in DAGs... To rebuild run', 'yellow'), colored( f'typhoon dag build{" " + remote if remote else ""} --all [--debug]', 'blue')) for dag in changed_dags: print(colored(f' - {dag}', 'blue')) else: print(colored('• DAGs up to date', 'green')) else: undeployed_dags = dags_without_deploy(remote) if undeployed_dags: print( colored('• Undeployed changes in DAGs... To deploy run', 'yellow'), colored( f'typhoon dag push {remote} --all [--build-dependencies]', 'blue')) for dag in undeployed_dags: print(colored(f' - {dag}', 'blue')) else: print(colored('• DAGs up to date', 'green'))
def remove_connection(remote: Optional[str], conn_id: str): """Remove connection from the metadata store""" set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) metadata_store.delete_connection(conn_id) print(f'Connection {conn_id} deleted')
def test_set_and_get_variable(cfg_path): store = Settings.metadata_store() store.set_variable(sample_var) assert store.get_variable(sample_var.id) == sample_var
def test_set_and_get_connection(cfg_path): store = Settings.metadata_store() store.set_connection(sample_conn) assert store.get_connection(sample_conn.conn_id) == sample_conn
def migrate(remote: str): """Create the necessary metadata tables""" set_settings_from_remote(remote) print(f'Migrating {Settings.metadata_db_url}...') Settings.metadata_store(aws_profile=Remotes.aws_profile(remote)).migrate()
def get_variables(): return {'connections': Settings.metadata_store().get_connections()}
def remove_variable(remote: Optional[str], var_id: str): """Remove connection from the metadata store""" set_settings_from_remote(remote) metadata_store = Settings.metadata_store(Remotes.aws_profile(remote)) metadata_store.delete_variable(var_id) print(f'Variable {var_id} deleted')
def get_variable(variable_id: str) -> Variable: env_var_value = os.environ.get(f'TYPHOON_VARIABLE_{variable_id}') if env_var_value: return Variable(variable_id, VariableType.STRING, env_var_value) else: return Settings.metadata_store().get_variable(variable_id)