def test_procedure_tests_sql(self): raw_procedure = parse_routine.RawRoutine.from_file( self.udf_dir.parent / "procedure" / "append_hello" / "stored_procedure.sql") raw_routines = parse_routine.read_routine_dir(self.udf_dir) raw_routines.update( parse_routine.read_routine_dir(self.udf_dir.parent / "procedure")) tests = parse_routine.routine_tests_sql(raw_procedure, raw_routines, self.udf_dir.parent) assert ( "CREATE OR REPLACE PROCEDURE\n _generic_dataset_.procedure_test_procedure" in tests[0]) assert ( "CREATE OR REPLACE PROCEDURE\n _generic_dataset_.procedure_append_hello" in tests[0]) assert ( "CREATE OR REPLACE PROCEDURE\n _generic_dataset_.procedure_test_procedure" in tests[1]) assert ( "CREATE OR REPLACE PROCEDURE\n _generic_dataset_.procedure_append_hello" in tests[1]) assert ( "CREATE OR REPLACE FUNCTION _generic_dataset_.udf_test_shift_28_bits_one_day" in tests[1])
def test_sub_local_routines(self): data_dir = TEST_DIR / "data" / "test_sql" / "moz-fx-data-test-project" raw_routines = parse_routine.read_routine_dir(data_dir / "udf") raw_routines.update( parse_routine.read_routine_dir(data_dir / "procedure")) raw_routine = parse_routine.RawRoutine.from_file( data_dir / "udf" / "test_shift_28_bits_one_day" / "udf.sql").tests[0] assert "CREATE TEMP FUNCTION" not in raw_routine assert "CREATE TEMP FUNCTION udf_test_bitmask_lowest_28" not in raw_routine result = parse_routine.sub_local_routines(raw_routine, self.udf_dir.parent, raw_routines) assert "CREATE TEMP FUNCTION udf_test_shift_28_bits_one_day" in result assert "CREATE TEMP FUNCTION udf_test_bitmask_lowest_28" in result text = "SELECT udf.test_bitmask_lowest_28(23), mozfun.hist.extract('{}')" result = parse_routine.sub_local_routines(text, self.udf_dir.parent, raw_routines) assert "CREATE TEMP FUNCTION udf_test_bitmask_lowest_28" in result # There is no defn for hist.extract in the `raw_routines`, # so we expect this to be unreplaced assert "hist_extract" not in result assert "mozfun.hist.extract" in result text = "CALL procedure.test_procedure(23);" result = parse_routine.sub_local_routines(text, self.udf_dir.parent, raw_routines, stored_procedure_test=True) assert ( "CREATE OR REPLACE PROCEDURE\n _generic_dataset_.procedure_test_procedure" in result)
def validate(project_dirs): """Validate UDF docs.""" is_valid = True for project_dir in project_dirs: if os.path.isdir(project_dir): parsed_routines = read_routine_dir(project_dir) for root, dirs, files in os.walk(project_dir): if os.path.basename(root) == EXAMPLE_DIR: for file in files: dry_run_sql = sub_local_routines( (Path(root) / file).read_text(), project_dir, parsed_routines, ) # store sql in temporary file for dry_run tmp_dir = Path(tempfile.mkdtemp()) / Path(root) tmp_dir.mkdir(parents=True, exist_ok=True) tmp_example_file = tmp_dir / file tmp_example_file.write_text(dry_run_sql) if not DryRun(str(tmp_example_file)).is_valid(): is_valid = False if not is_valid: print("Invalid examples.") sys.exit(1)
def publish(target, project_id, dependency_dir, gcs_bucket, gcs_path, public): """Publish routines in the provided directory.""" client = bigquery.Client(project_id) if dependency_dir and os.path.exists(dependency_dir): push_dependencies_to_gcs(gcs_bucket, gcs_path, dependency_dir, os.path.basename(target)) raw_routines = read_routine_dir(target) published_routines = [] for raw_routine in raw_routines: # get all dependencies for UDF and publish as persistent UDF udfs_to_publish = accumulate_dependencies([], raw_routines, raw_routine) udfs_to_publish.append(raw_routine) for dep in udfs_to_publish: if dep not in published_routines and raw_routines[ dep].filepath not in SKIP: publish_routine( raw_routines[dep], client, project_id, gcs_bucket, gcs_path, raw_routines.keys(), public, ) published_routines.append(dep)
def test_read_routine_dir(self): raw_routines = parse_routine.read_routine_dir(self.udf_dir) assert "udf.test_shift_28_bits_one_day" in raw_routines assert "udf.test_safe_crc32_uuid" in raw_routines assert "udf.test_safe_sample_id" in raw_routines assert "udf.test_shift_28_bits_one_day" assert (raw_routines["udf.test_shift_28_bits_one_day"].name == "udf.test_shift_28_bits_one_day") assert (type(raw_routines["udf.test_shift_28_bits_one_day"]) == parse_routine.RawRoutine)
def test_accumulate_dependencies(self): raw_routines = parse_routine.read_routine_dir(self.udf_dir) result = parse_routine.accumulate_dependencies( [], raw_routines, "udf.test_shift_28_bits_one_day") assert "udf.test_shift_28_bits_one_day" in result assert "udf.test_bitmask_lowest_28" in result result = parse_routine.accumulate_dependencies( [], raw_routines, "udf.test_bitmask_lowest_28") assert "udf.test_bitmask_lowest_28" in result
def test_routine_tests_sql(self): raw_routines = parse_routine.read_routine_dir(self.udf_dir) raw_routine = parse_routine.RawRoutine.from_file( self.udf_dir / "test_shift_28_bits_one_day" / "udf.sql") result = parse_routine.routine_tests_sql(raw_routine, raw_routines, self.udf_dir.parent)[0] assert "CREATE TEMP FUNCTION udf_test_shift_28_bits_one_day" in result assert "CREATE TEMP FUNCTION udf_test_bitmask_lowest_28" in result raw_routine = parse_routine.RawRoutine.from_file( self.udf_dir / "test_bitmask_lowest_28" / "udf.sql") result = parse_routine.routine_tests_sql(raw_routine, raw_routines, self.udf_dir.parent) assert result == []
def test_routine_usage_definitions(self): raw_routines = parse_routine.read_routine_dir(self.udf_dir) text = "SELECT udf.test_bitmask_lowest_28(0), udf.test_safe_sample_id('')" result = parse_routine.routine_usage_definitions( text, self.udf_dir.parent, raw_routines) assert len(result) == 11 assert ("CREATE OR REPLACE FUNCTION udf.test_bitmask_lowest_28()" + " AS (\n 0x0FFFFFFF\n);" in result) assert ( "CREATE OR REPLACE FUNCTION udf.test_safe_sample_id(client_id STRING) AS" + " (\n MOD(udf.test_safe_crc32_uuid(CAST(client_id AS BYTES)), 100)\n);" in result)
def udfs(): """Get all udfs and assertions.""" return read_routine_dir("tests/assert", "udf", "udf_js")