def validate_dag(mode, dag_data): """ Validates a dag file to confirm that it is valid and compatible with Cloud Composer. Args: mode (string): INLINE, GCS or GIT dag_data (string): Either a JSON payload containing the DSL dag definition (mode==INLINE) or the path to a dag file (mode!=INLINE) Returns: a dict containing the dag_definition and an is_valid indication """ logger.log(logging.DEBUG, f"Validating dag in mode: {mode} with data {dag_data}") if mode == "INLINE": dag = __construct_dag_from_dsl(dag_data) dag_val = dag_validator.DagValidator(dag['dag_file']) else: dag_val = dag_validator.DagValidator(dag_data) dag_details = { 'is_valid': True, 'dag_definition': json.loads(dag_val.inspect_dag()), } return dag_details
def deploy_dag(project_id, mode, bucket_name, dag_data=None, dag_file=None): """ Deploys a dag file into a Cloud Composer environment. Args: project_id (string): GCP Project Id of the Cloud Composer instance mode (string): INLINE, GCS or GIT bucket_name (string): The bucket name of the GCS location of the Cloud Composer dag files (without the /dags prefix) dag_data (string): JSON payload containing the DSL dag definition (mode==INLINE) dag_file (string): Path to a dag file (mode!=INLINE) Returns: the url to the GCS bucket (gs:// path) where the dag file was deployed """ logger.log(logging.DEBUG, f"Validating dag in mode: {mode}") if mode == "INLINE": if dag_data is None: raise ValueError( f"INLINE mode has been specified but no dag_data was provided") else: dag = __construct_dag_from_dsl(dag_data) # validate the dag so we don't deploy a dag with errors dag_validator.DagValidator(dag['dag_file']).validate_dag() # upload the DAG and its associated JSON payload gcs_upload_file(project_id, bucket_name, "dags/", dag['dag_file']) gcs_upload_file(project_id, bucket_name, "dags/", dag['json_file']) # return the GCS path return f"gs://{bucket_name}/dags/{os.path.basename(os.path.normpath(dag['dag_file']))}" else: if dag_file is None: raise ValueError( f"GCS mode has been specified but no dag_file was provided") else: # validate the dag so we don't deploy a dag with errors dag_validator.DagValidator(dag_file).validate_dag() # upload the DAG gcs_upload_file(project_id, bucket_name, "dags/", dag_file) # return the GCS path return f"gs://{bucket_name}/dags/{os.path.basename(os.path.normpath(dag_file))}"
def test_validate_dag_from_payload_invalid(): for test_dag in get_test_files(DIR_DAGS_INVALID, EXT_PAYLOAD): dag_data = dag_generator.DagGenerator(json_payload_to_dict(DIR_DAGS_INVALID, test_dag)).generate_dag() validator = dag_validator.DagValidator(dag_data['dag_file']) with pytest.raises(Exception): validator.assert_has_valid_dag()
def test_inspect_dag_from_static(): for test_dag in get_test_files(DIR_DAGS_STATIC, EXT_STATIC): static_dag_file = os.path.join(os.path.dirname(Path(__file__)), DIR_DAGS_STATIC, test_dag) validator = dag_validator.DagValidator(static_dag_file) assert json.loads(validator.inspect_dag()) is not None
def test_inspect_dag_from_payload(): for test_dag in get_test_files(DIR_DAGS_VALID, EXT_PAYLOAD): dag_data = dag_generator.DagGenerator(json_payload_to_dict(DIR_DAGS_VALID, test_dag)).generate_dag() validator = dag_validator.DagValidator(dag_data['dag_file']) assert json.loads(validator.inspect_dag()) is not None
def test_validate_dag_from_static(): for test_dag in get_test_files(DIR_DAGS_STATIC, EXT_STATIC): static_dag_file = os.path.join(os.path.dirname(Path(__file__)), DIR_DAGS_STATIC, test_dag) validator = dag_validator.DagValidator(static_dag_file) assert not validator.assert_has_valid_dag()
def test_validate_dag_from_payload(): for test_dag in get_test_files(DIR_DAGS_VALID, EXT_PAYLOAD): dag_data = dag_generator.DagGenerator(json_payload_to_dict(DIR_DAGS_VALID, test_dag)).generate_dag() validator = dag_validator.DagValidator(dag_data['dag_file']) assert not validator.assert_has_valid_dag()