def test_is_valid_label(self):
     assert Metadata.is_valid_label("valid_label")
     assert Metadata.is_valid_label("valid-label1")
     assert Metadata.is_valid_label("1231")
     assert Metadata.is_valid_label("1231-21")
     assert Metadata.is_valid_label("a" * 63)
     assert Metadata.is_valid_label("låbel") is False
     assert Metadata.is_valid_label("a" * 64) is False
     assert Metadata.is_valid_label("INVALID") is False
     assert Metadata.is_valid_label("invalid.label") is False
     assert Metadata.is_valid_label("") is False
 def test_of_sql_file_no_metadata(self):
     metadata_file = (
         TEST_DIR
         / "data"
         / "test_sql"
         / "test"
         / "no_metadata_query_v1"
         / "query.sql"
     )
     with pytest.raises(FileNotFoundError):
         Metadata.of_sql_file(metadata_file)
    def test_dags_with_tasks(self):
        query_file = (TEST_DIR / "data" / "test_sql" / "test" /
                      "incremental_query_v1" / "query.sql")

        metadata = Metadata(
            "test",
            "test",
            {},
            {
                "dag_name": "test_dag",
                "depends_on_past": True,
                "param": "test_param"
            },
        )

        tasks = [Task(query_file, metadata)]

        dags = DagCollection.from_dict({
            "test_dag": {
                "schedule_interval": "daily",
                "default_args": {}
            }
        }).with_tasks(tasks)

        assert len(dags.dags) == 1

        dag = dags.dag_by_name("test_dag")
        assert len(dag.tasks) == 1
        assert dag.tasks[0].dag_name == "test_dag"
Пример #4
0
def main():
    """Publish query data as JSON to GCS."""
    args, query_arguments = parser.parse_known_args()

    try:
        metadata = Metadata.of_sql_file(args.query_file)
    except FileNotFoundError:
        print("No metadata file for: {}".format(args.query_file))
        return

    # check if the data should be published as JSON
    if not metadata.is_public_json():
        return

    storage_client = storage.Client()
    client = bigquery.Client(args.project_id)

    publisher = JsonPublisher(
        client,
        storage_client,
        args.project_id,
        args.query_file,
        args.api_version,
        args.target_bucket,
        args.parameter,
    )
    publisher.publish_json()
    def test_of_table(self):
        metadata = Metadata.of_table(
            "test", "non_incremental_query", "v1", TEST_DIR / "data" / "test_sql"
        )

        assert metadata.friendly_name == "Test table for a non-incremental query"
        assert metadata.description == "Test table for a non-incremental query"
        assert metadata.review_bug() == "1999999"
Пример #6
0
    def of_query(cls, query_file):
        """
        Create task that schedules the corresponding query in Airflow.

        Raises FileNotFoundError if not metadata file exists for query.
        """
        metadata = Metadata.of_sql_file(query_file)
        return cls(query_file, metadata)
Пример #7
0
    def test_no_dag_name(self):
        query_file = (
            TEST_DIR
            / "data"
            / "test_sql"
            / "test"
            / "incremental_query_v1"
            / "query.sql"
        )

        metadata = Metadata("test", "test", {}, {"foo": "bar"})

        with pytest.raises(TaskParseException):
            Task(query_file, metadata)
Пример #8
0
    def test_unscheduled_task(self):
        query_file = (
            TEST_DIR
            / "data"
            / "test_sql"
            / "test"
            / "incremental_query_v1"
            / "query.sql"
        )

        metadata = Metadata("test", "test", {}, {})

        with pytest.raises(UnscheduledTask):
            Task(query_file, metadata)
    def test_of_sql_file(self):
        metadata_file = (
            TEST_DIR
            / "data"
            / "test_sql"
            / "test"
            / "non_incremental_query_v1"
            / "query.sql"
        )
        metadata = Metadata.of_sql_file(metadata_file)

        assert metadata.friendly_name == "Test table for a non-incremental query"
        assert metadata.description == "Test table for a non-incremental query"
        assert metadata.review_bug() == "1999999"
Пример #10
0
    def __init__(
        self,
        client,
        storage_client,
        project_id,
        query_file,
        api_version,
        target_bucket,
        parameter=None,
    ):
        """Init JsonPublisher."""
        self.project_id = project_id
        self.query_file = query_file
        self.api_version = api_version
        self.target_bucket = target_bucket
        self.parameter = parameter
        self.client = client
        self.storage_client = storage_client
        self.temp_table = None
        self.date = None
        self.stage_gcs_path = "stage/json/"

        self.metadata = Metadata.of_sql_file(self.query_file)

        # only for incremental exports files are written into separate directories
        # for each date, ignore date parameters for non-incremental exports
        if self.metadata.is_incremental_export() and self.parameter:
            for p in self.parameter:
                date_search = re.search(SUBMISSION_DATE_RE, p)

                if date_search:
                    self.date = date_search.group(1)

        query_file_re = re.search(QUERY_FILE_RE, self.query_file)
        if query_file_re:
            self.dataset = query_file_re.group(1)
            self.table = query_file_re.group(2)
            self.version = query_file_re.group(3)
        else:
            logging.error("Invalid file naming format: {}", self.query_file)
            sys.exit(1)
Пример #11
0
    def test_task_instantiation(self):
        query_file = (
            TEST_DIR
            / "data"
            / "test_sql"
            / "test"
            / "incremental_query_v1"
            / "query.sql"
        )

        metadata = Metadata(
            "test",
            "test",
            {},
            {"dag_name": "test_dag", "depends_on_past": True, "param": "test_param"},
        )

        task = Task(query_file, metadata)
        assert task.dag_name == "test_dag"
        assert task.args["depends_on_past"]
        assert task.args["param"] == "test_param"
Пример #12
0
    def test_from_file(self):
        metadata_file = TEST_DIR / "data" / "metadata.yaml"
        metadata = Metadata.from_file(metadata_file)

        assert metadata.friendly_name == "Test metadata file"
        assert metadata.description is None
        assert "schedule" in metadata.labels
        assert metadata.labels["schedule"] == "daily"
        assert "public_json" in metadata.labels
        assert metadata.labels["public_json"] == ""
        assert metadata.is_public_json()
        assert metadata.is_incremental()
        assert metadata.is_incremental_export()
        assert metadata.review_bug() is None
        assert "invalid_value" not in metadata.labels
        assert "invalid.label" not in metadata.labels
        assert "1232341234" in metadata.labels
        assert "1234_abcd" in metadata.labels
        assert "number_value" in metadata.labels
        assert metadata.labels["number_value"] == "1234234"
        assert "number_string" in metadata.labels
        assert metadata.labels["number_string"] == "1234abcde"
        assert "123-432" in metadata.labels
Пример #13
0
    def test_dags_with_invalid_tasks(self):
        with pytest.raises(InvalidDag):
            query_file = (TEST_DIR / "data" / "test_sql" / "test" /
                          "incremental_query_v1" / "query.sql")

            metadata = Metadata(
                "test",
                "test",
                {},
                {
                    "dag_name": "non_exisiting_dag",
                    "depends_on_past": True,
                    "param": "test_param",
                },
            )

            tasks = [Task(query_file, metadata)]

            DagCollection.from_dict({
                "test_dag": {
                    "schedule_interval": "daily",
                    "default_args": {}
                }
            }).with_tasks(tasks)
Пример #14
0
 def test_of_non_existing_table(self):
     with pytest.raises(FileNotFoundError):
         Metadata.of_table(
             "test", "no_metadata", "v1", TEST_DIR / "data" / "test_sql"
         )
Пример #15
0
 def test_non_existing_file(self):
     metadata_file = TEST_DIR / "nonexisting_dir" / "metadata.yaml"
     with pytest.raises(FileNotFoundError):
         Metadata.from_file(metadata_file)