def test_is_valid_label(self): assert Metadata.is_valid_label("valid_label") assert Metadata.is_valid_label("valid-label1") assert Metadata.is_valid_label("1231") assert Metadata.is_valid_label("1231-21") assert Metadata.is_valid_label("a" * 63) assert Metadata.is_valid_label("låbel") is False assert Metadata.is_valid_label("a" * 64) is False assert Metadata.is_valid_label("INVALID") is False assert Metadata.is_valid_label("invalid.label") is False assert Metadata.is_valid_label("") is False
def test_of_sql_file_no_metadata(self): metadata_file = ( TEST_DIR / "data" / "test_sql" / "test" / "no_metadata_query_v1" / "query.sql" ) with pytest.raises(FileNotFoundError): Metadata.of_sql_file(metadata_file)
def test_dags_with_tasks(self): query_file = (TEST_DIR / "data" / "test_sql" / "test" / "incremental_query_v1" / "query.sql") metadata = Metadata( "test", "test", {}, { "dag_name": "test_dag", "depends_on_past": True, "param": "test_param" }, ) tasks = [Task(query_file, metadata)] dags = DagCollection.from_dict({ "test_dag": { "schedule_interval": "daily", "default_args": {} } }).with_tasks(tasks) assert len(dags.dags) == 1 dag = dags.dag_by_name("test_dag") assert len(dag.tasks) == 1 assert dag.tasks[0].dag_name == "test_dag"
def main(): """Publish query data as JSON to GCS.""" args, query_arguments = parser.parse_known_args() try: metadata = Metadata.of_sql_file(args.query_file) except FileNotFoundError: print("No metadata file for: {}".format(args.query_file)) return # check if the data should be published as JSON if not metadata.is_public_json(): return storage_client = storage.Client() client = bigquery.Client(args.project_id) publisher = JsonPublisher( client, storage_client, args.project_id, args.query_file, args.api_version, args.target_bucket, args.parameter, ) publisher.publish_json()
def test_of_table(self): metadata = Metadata.of_table( "test", "non_incremental_query", "v1", TEST_DIR / "data" / "test_sql" ) assert metadata.friendly_name == "Test table for a non-incremental query" assert metadata.description == "Test table for a non-incremental query" assert metadata.review_bug() == "1999999"
def of_query(cls, query_file): """ Create task that schedules the corresponding query in Airflow. Raises FileNotFoundError if not metadata file exists for query. """ metadata = Metadata.of_sql_file(query_file) return cls(query_file, metadata)
def test_no_dag_name(self): query_file = ( TEST_DIR / "data" / "test_sql" / "test" / "incremental_query_v1" / "query.sql" ) metadata = Metadata("test", "test", {}, {"foo": "bar"}) with pytest.raises(TaskParseException): Task(query_file, metadata)
def test_unscheduled_task(self): query_file = ( TEST_DIR / "data" / "test_sql" / "test" / "incremental_query_v1" / "query.sql" ) metadata = Metadata("test", "test", {}, {}) with pytest.raises(UnscheduledTask): Task(query_file, metadata)
def test_of_sql_file(self): metadata_file = ( TEST_DIR / "data" / "test_sql" / "test" / "non_incremental_query_v1" / "query.sql" ) metadata = Metadata.of_sql_file(metadata_file) assert metadata.friendly_name == "Test table for a non-incremental query" assert metadata.description == "Test table for a non-incremental query" assert metadata.review_bug() == "1999999"
def __init__( self, client, storage_client, project_id, query_file, api_version, target_bucket, parameter=None, ): """Init JsonPublisher.""" self.project_id = project_id self.query_file = query_file self.api_version = api_version self.target_bucket = target_bucket self.parameter = parameter self.client = client self.storage_client = storage_client self.temp_table = None self.date = None self.stage_gcs_path = "stage/json/" self.metadata = Metadata.of_sql_file(self.query_file) # only for incremental exports files are written into separate directories # for each date, ignore date parameters for non-incremental exports if self.metadata.is_incremental_export() and self.parameter: for p in self.parameter: date_search = re.search(SUBMISSION_DATE_RE, p) if date_search: self.date = date_search.group(1) query_file_re = re.search(QUERY_FILE_RE, self.query_file) if query_file_re: self.dataset = query_file_re.group(1) self.table = query_file_re.group(2) self.version = query_file_re.group(3) else: logging.error("Invalid file naming format: {}", self.query_file) sys.exit(1)
def test_task_instantiation(self): query_file = ( TEST_DIR / "data" / "test_sql" / "test" / "incremental_query_v1" / "query.sql" ) metadata = Metadata( "test", "test", {}, {"dag_name": "test_dag", "depends_on_past": True, "param": "test_param"}, ) task = Task(query_file, metadata) assert task.dag_name == "test_dag" assert task.args["depends_on_past"] assert task.args["param"] == "test_param"
def test_from_file(self): metadata_file = TEST_DIR / "data" / "metadata.yaml" metadata = Metadata.from_file(metadata_file) assert metadata.friendly_name == "Test metadata file" assert metadata.description is None assert "schedule" in metadata.labels assert metadata.labels["schedule"] == "daily" assert "public_json" in metadata.labels assert metadata.labels["public_json"] == "" assert metadata.is_public_json() assert metadata.is_incremental() assert metadata.is_incremental_export() assert metadata.review_bug() is None assert "invalid_value" not in metadata.labels assert "invalid.label" not in metadata.labels assert "1232341234" in metadata.labels assert "1234_abcd" in metadata.labels assert "number_value" in metadata.labels assert metadata.labels["number_value"] == "1234234" assert "number_string" in metadata.labels assert metadata.labels["number_string"] == "1234abcde" assert "123-432" in metadata.labels
def test_dags_with_invalid_tasks(self): with pytest.raises(InvalidDag): query_file = (TEST_DIR / "data" / "test_sql" / "test" / "incremental_query_v1" / "query.sql") metadata = Metadata( "test", "test", {}, { "dag_name": "non_exisiting_dag", "depends_on_past": True, "param": "test_param", }, ) tasks = [Task(query_file, metadata)] DagCollection.from_dict({ "test_dag": { "schedule_interval": "daily", "default_args": {} } }).with_tasks(tasks)
def test_of_non_existing_table(self): with pytest.raises(FileNotFoundError): Metadata.of_table( "test", "no_metadata", "v1", TEST_DIR / "data" / "test_sql" )
def test_non_existing_file(self): metadata_file = TEST_DIR / "nonexisting_dir" / "metadata.yaml" with pytest.raises(FileNotFoundError): Metadata.from_file(metadata_file)