def get_dag_dependencies(cls, session: Session = None ) -> Dict[str, List['DagDependency']]: """ Get the dependencies between DAGs :param session: ORM Session :type session: Session """ dependencies = {} if session.bind.dialect.name in ["sqlite", "mysql"]: for row in session.query( cls.dag_id, func.json_extract(cls.data, "$.dag.dag_dependencies")).all(): dependencies[row[0]] = [ DagDependency(**d) for d in json.loads(row[1]) ] elif session.bind.dialect.name == "mssql": for row in session.query( cls.dag_id, func.json_query(cls.data, "$.dag.dag_dependencies")).all(): dependencies[row[0]] = [ DagDependency(**d) for d in json.loads(row[1]) ] else: for row in session.query( cls.dag_id, func.json_extract_path(cls.data, "dag", "dag_dependencies")).all(): dependencies[row[0]] = [DagDependency(**d) for d in row[1]] return dependencies
def get_dag_dependencies(cls, session: Session = None ) -> Dict[str, List['DagDependency']]: """ Get the dependencies between DAGs :param session: ORM Session :type session: Session """ if session.bind.dialect.name in ["sqlite", "mysql"]: query = session.query( cls.dag_id, func.json_extract(cls.data, "$.dag.dag_dependencies")) iterator = ((dag_id, json.loads(deps_data) if deps_data else []) for dag_id, deps_data in query) elif session.bind.dialect.name == "mssql": query = session.query( cls.dag_id, func.json_query(cls.data, "$.dag.dag_dependencies")) iterator = ((dag_id, json.loads(deps_data) if deps_data else []) for dag_id, deps_data in query) else: iterator = session.query( cls.dag_id, func.json_extract_path(cls.data, "dag", "dag_dependencies")) return { dag_id: [DagDependency(**d) for d in (deps_data or [])] for dag_id, deps_data in iterator }
def execute(self, context: "Context") -> Optional[List[Any]]: with PsrpHook( self.conn_id, logging_level=self.logging_level, runspace_options=self.runspace_options, wsman_options=self.wsman_options, on_output_callback=self.log.info if not self.do_xcom_push else None, ) as hook, hook.invoke() as ps: if self.psrp_session_init is not None: ps.add_command(self.psrp_session_init) if self.command: ps.add_script(f"cmd.exe /c @'\n{self.command}\n'@") else: if self.cmdlet: ps.add_cmdlet(self.cmdlet) else: ps.add_script(self.powershell) if self.parameters: ps.add_parameters(self.parameters) if self.do_xcom_push: ps.add_cmdlet("ConvertTo-Json") if ps.had_errors: raise AirflowException("Process failed") rc = ps.runspace_pool.host.rc if rc: raise AirflowException( f"Process exited with non-zero status code: {rc}") if not self.do_xcom_push: return None return [json.loads(output) for output in ps.output]
def data(self): # use __data_cache to avoid decompress and loads if not hasattr(self, "__data_cache") or self.__data_cache is None: if self._data_compressed: self.__data_cache = json.loads(zlib.decompress(self._data_compressed)) else: self.__data_cache = self._data return self.__data_cache
def load_dag_schema_dict() -> dict: """Load & return Json Schema for DAG as Python dict""" schema_file_name = 'schema.json' schema_file = pkgutil.get_data(__name__, schema_file_name) if schema_file is None: raise AirflowException("Schema file {} does not exists".format(schema_file_name)) schema = json.loads(schema_file.decode()) return schema
def validate_schema(cls, serialized_obj: Union[str, dict]) -> None: """Validate serialized_obj satisfies JSON schema.""" if cls._json_schema is None: raise AirflowException('JSON schema of {:s} is not set.'.format(cls.__name__)) if isinstance(serialized_obj, dict): cls._json_schema.validate(serialized_obj) elif isinstance(serialized_obj, str): cls._json_schema.validate(json.loads(serialized_obj)) else: raise TypeError("Invalid type: Only dict and str are supported.")
def load_dag_schema() -> Validator: """ Load Json Schema for DAG """ schema_file_name = 'schema.json' schema_file = pkgutil.get_data(__name__, schema_file_name) if schema_file is None: raise AirflowException("Schema file {} does not exists".format(schema_file_name)) schema = json.loads(schema_file.decode()) jsonschema.Draft7Validator.check_schema(schema) return jsonschema.Draft7Validator(schema)
def from_json( cls, serialized_obj: str ) -> Union['BaseSerialization', dict, list, set, tuple]: """Deserializes json_str and reconstructs all DAGs and operators it contains.""" return cls.from_dict(json.loads(serialized_obj))
from airflow import DAG from datetime import datetime, timedelta from airflow.models import Variable from airflow.settings import json from airflow_dbt.operators.dbt_operator import ( DbtRunOperator, DbtTestOperator ) import os from common.operators.covid19_to_ingestions import Covid19ToIngestions default_args = json.loads(Variable.get('covid19')) default_args.update({"retry_delay": timedelta(minutes=default_args["retry_delay"])}) dbt_dir = os.environ["DBT_DIR"] dbt_profiles_dir = os.environ["DBT_PROFILES_DIR"] with DAG( 'covid19_dbt', default_args=default_args, description='Managing dbt data pipeline', schedule_interval='@daily') as dag: ingest_covid19_day_task = Covid19ToIngestions(task_id='ingest_covid19_day_to_dbt', dag=dag) dbt_run = DbtRunOperator( task_id='dbt_run', dir = dbt_dir, profiles_dir=dbt_profiles_dir,