Exemplo n.º 1
0
    def run(
        self,
        project: str = None,
        credentials: dict = None,
        credentials_secret: str = None,
        dataset: str = None,
        table: str = None,
        schema: List[bigquery.SchemaField] = None,
    ):
        """
        Run method for this Task.  Invoked by _calling_ this Task within a Flow context, after initialization.

        Args:
            - project (str, optional): the project to initialize the BigQuery Client with; if not provided,
                will default to the one inferred from your credentials
            - credentials (dict, optional): a JSON document containing Google Cloud credentials.
                You should provide these at runtime with an upstream Secret task.
            - credentials_secret (str, optional, DEPRECATED): the name of the Prefect Secret
                containing a JSON representation of your Google Application credentials
            - dataset (str, optional): the name of a dataset in that the table will be created
            - table (str, optional): the name of a table to create
            - schema (List[bigquery.SchemaField], optional): the schema to use when creating the table

        Returns:
            - None

        Raises:
            - SUCCESS: a `SUCCESS` signal if the table already exists
        """
        client = get_client(
            project=project,
            credentials=credentials,
            credentials_secret=credentials_secret,
        )

        try:
            dataset_ref = client.get_dataset(dataset)
        except NotFound:
            self.logger.debug(
                "Dataset {} not found, creating...".format(dataset))
            dataset_ref = client.create_dataset(dataset)

        table_ref = dataset_ref.table(table)
        try:
            client.get_table(table_ref)
            raise SUCCESS("{dataset}.{table} already exists.".format(
                dataset=dataset, table=table))
        except NotFound:
            self.logger.debug("Table {} not found, creating...".format(table))
            table = bigquery.Table(table_ref, schema=schema)

            # partitioning
            if self.time_partitioning:
                table.time_partitioning = self.time_partitioning

            # cluster for optimal data sorting/access
            if self.clustering_fields:
                table.clustering_fields = self.clustering_fields
            client.create_table(table)
Exemplo n.º 2
0
    def run(
        self,
        project: str = None,
        credentials_secret: str = None,
        dataset: str = None,
        table: str = None,
        schema: List[bigquery.SchemaField] = None,
    ):
        """
        Run method for this Task.  Invoked by _calling_ this Task within a Flow context, after initialization.

        Args:
            - project (str, optional): the project to initialize the BigQuery Client with; if not provided,
                will default to the one inferred from your credentials
            - credentials_secret (str, optional): the name of the Prefect Secret containing a JSON representation
                of your Google Application credentials; defaults to `"GOOGLE_APPLICATION_CREDENTIALS"`
            - dataset (str, optional): the name of a dataset in which the table will be created
            - table (str, optional): the name of a table to create
            - schema (List[bigquery.SchemaField], optional): the schema to use when creating the table

        Returns:
            - None

        Raises:
            - SUCCESS: a `SUCCESS` signal if the table already exists
        """
        creds = Secret(credentials_secret).get()
        credentials = Credentials.from_service_account_info(creds)
        project = project or credentials.project_id
        client = bigquery.Client(project=project, credentials=credentials)

        try:
            dataset_ref = client.get_dataset(dataset)
        except NotFound:
            self.logger.debug(
                "Dataset {} not found, creating...".format(dataset))
            dataset_ref = client.create_dataset(dataset)

        table_ref = dataset_ref.table(table)
        try:
            client.get_table(table_ref)
            raise SUCCESS("{dataset}.{table} already exists.".format(
                dataset=dataset, table=table))
        except NotFound:
            self.logger.debug("Table {} not found, creating...".format(table))
            table = bigquery.Table(table_ref, schema=schema)

            # partitioning
            if self.time_partitioning:
                table.time_partitioning = self.time_partitioning

            # cluster for optimal data sorting/access
            if self.clustering_fields:
                table.clustering_fields = self.clustering_fields
            client.create_table(table)
Exemplo n.º 3
0
def test_signals_dont_pass_invalid_arguments_to_states():
    with pytest.raises(TypeError):
        raise SUCCESS(bad_result=100)
Exemplo n.º 4
0
 def succeed():
     raise SUCCESS()