Exemplo n.º 1
0
 def _param(key, type_, value):
     if type_ not in ['ARRAY', 'STRUCT']:
         param = bigquery.ScalarQueryParameter(key, type_, value)
     elif type_ == 'ARRAY':
         type_ = {map(_type, value)}
         if len(type_) > 1:
             raise Exception(f'array has too many types: {type_}')
         param = bigquery.ArrayQueryParameter(key, type_[0], value)
     elif type_ == 'STRUCT':
         args = [key]
         for k, v in value.items():
             t_ = _type(v)
             args.append(_param(k, t_, v))
         param = bigquery.StructQueryParameter(*args)
     return param
def query_struct_params(x, y):
    client = bigquery.Client()
    query = 'SELECT @struct_value AS s;'
    query_job = client.run_async_query(
        str(uuid.uuid4()),
        query,
        query_parameters=[
            bigquery.StructQueryParameter(
                'struct_value',
                bigquery.ScalarQueryParameter('x', 'INT64', x),
                bigquery.ScalarQueryParameter('y', 'STRING', y))])
    query_job.use_legacy_sql = False

    # Start the query and wait for the job to complete.
    query_job.begin()
    wait_for_job(query_job)
    print_results(query_job.results())
Exemplo n.º 3
0
def query_struct_params(x, y):
    client = bigquery.Client()
    query = 'SELECT @struct_value AS s;'
    query_params = [
        bigquery.StructQueryParameter(
            'struct_value', bigquery.ScalarQueryParameter('x', 'INT64', x),
            bigquery.ScalarQueryParameter('y', 'STRING', y))
    ]
    job_config = bigquery.QueryJobConfig()
    job_config.query_parameters = query_params
    query_job = client.query(query, job_config=job_config)

    query_job.result()  # Waits for job to complete

    # Print the results.
    destination_table_ref = query_job.destination
    table = client.get_table(destination_table_ref)
    for row in client.list_rows(table):
        print(row)
Exemplo n.º 4
0
def client_query_w_struct_params():

    # [START bigquery_query_params_structs]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = "SELECT @struct_value AS s;"
    job_config = bigquery.QueryJobConfig(query_parameters=[
        bigquery.StructQueryParameter(
            "struct_value",
            bigquery.ScalarQueryParameter("x", "INT64", 1),
            bigquery.ScalarQueryParameter("y", "STRING", "foo"),
        )
    ])
    query_job = client.query(query,
                             job_config=job_config)  # Make an API request.

    for row in query_job:
        print(row.s)
def query_struct_params(x, y):
    client = bigquery.Client()
    query = 'SELECT @struct_value AS s;'
    query_job = client.run_async_query(
        str(uuid.uuid4()),
        query,
        query_parameters=[
            bigquery.StructQueryParameter(
                'struct_value', bigquery.ScalarQueryParameter('x', 'INT64', x),
                bigquery.ScalarQueryParameter('y', 'STRING', y))
        ])
    query_job.use_legacy_sql = False

    query_job.begin()
    query_job.result()  # Wait for job to complete

    # Print the results.
    destination_table = query_job.destination
    destination_table.reload()
    for row in destination_table.fetch_data():
        print(row)
Exemplo n.º 6
0
def bq_param_struct(param, value):
    field_params = [bigquery_param(param[k], v) for k, v in value.items()]
    result = bq.StructQueryParameter(param.get_name(), *field_params)
    return result
Exemplo n.º 7
0
Arquivo: client.py Projeto: shshe/ibis
def bq_param_struct(param, value):
    field_params = [bigquery_param(param[k], v) for k, v in value.items()]
    return bq.StructQueryParameter(param._name, *field_params)
Exemplo n.º 8
0
    def execute(self, data):

        # setup 
        client = self.client
        bigquery = self.bigquery
        bigquery_datatransfer = self.bigquery_datatransfer
        datetime = self.datetime 
        pytz = self.pytz        
        time = self.time 
        name = data.get("titleName")
        emails = data.get("emails")
        query = data.get("query")
        table = ""
        #

        # create a dataset first if needed
        dataset_main = self.make_dataset()
        table_id = "{}.{}".format(dataset_main, name) 
        #    

        #create a table if needed
        table= self.make_table(table_id)
        #
                

        # an interactive query
        if(self.env.get("interactive")):
            try:
                 
                """
                    query =
                    SELECT name, SUM(number) as total_people
                    FROM `bigquery-public-data.usa_names.usa_1910_2013`
                    WHERE state = 'TX'
                    GROUP BY name, state
                    ORDER BY total_people DESC
                    LIMIT 20
                """
                query_job = client.query(query)  # Make an API request.

                
                return json.dumps({
                    "schema":[{"field":x} for x in ["Name","Count"]],
                    "data":[
                        # Row values can be accessed by field name or index.
                        {
                            "Name":row[0],
                            "Count":row["total_people"] 
                        }
                        for row in query_job
                    ]
                })
                    
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # a batch query
        elif(self.env.get("batch")):
            try:
                job_config = bigquery.QueryJobConfig(
                    # Run at batch priority, which won't count toward concurrent rate limit.
                    priority=bigquery.QueryPriority.BATCH
                )

                """
                    query =
                    SELECT corpus
                    FROM `bigquery-public-data.samples.shakespeare`
                    GROUP BY corpus;
                """

                # Start the query, passing in the extra configuration.
                query_job = client.query(query, job_config=job_config)  # Make an API request.

                query_job = client.get_job(
                    query_job.job_id, location=query_job.location
                )  # Make an API request.

                return "Job {} is currently in state {}".format(query_job.job_id, query_job.state)             
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'       
        # 

        # a dry run
        elif (self.env.get("dry_run")):
            try:
                """ query
                    
                    SELECT name, COUNT(*) as name_count 
                    FROM `bigquery-public-data.usa_names.usa_1910_2013` 
                    WHERE state = 'WA' 
                    GROUP BY name
                """                
                job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

                # Start the query, passing in the extra configuration.
                query_job = client.query(
                    query,
                    job_config=job_config,
                )  # Make an API request.

                # A dry run query completes immediately.
                return "This query will process {} megabytes.".format(query_job.total_bytes_processed/1000000)                
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # write to a  permanent table
        elif(self.env.get("permanent_table")):
            try:
                 
                """ query
                SELECT corpus
                FROM `bigquery-public-data.samples.shakespeare`
                GROUP BY corpus;
                """                
                job_config = bigquery.QueryJobConfig(destination=table_id)
                # Start the query, passing in the extra configuration.
                query_job = client.query(query, job_config=job_config)  # Make an API request.

                # fr queries that return massive results using legacy SQL
                # job_config = bigquery.QueryJobConfig(
                #     allow_large_results=True, destination=table_id, use_legacy_sql=True
                # )
                
                query_job.result()  # Wait for the job to complete.

                return "Query results loaded to the table {}".format(table_id)           
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # query caching
        elif (self.env.get("cache_enable/disable")):
            try:
                """ query

                    SELECT corpus
                    FROM `bigquery-public-data.samples.shakespeare`
                    GROUP BY corpus;
                """                
                job_config = bigquery.QueryJobConfig(
                    use_query_cache=False # to disable
                    # createDisposition="CREATE_NEVER"  # to enable
                )
                query_job = client.query(query, job_config=job_config)  # Make an API request.
                query_job.result()
                return "was this query cached? {}".format(query_job.cache_hit)
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # regular parameterized 
        elif(self.env.get("parameterized")):
            try:
                """ query = 
                 
                     SELECT word, word_count
                     FROM `bigquery-public-data.samples.shakespeare`
                     WHERE corpus = @corpus
                     AND word_count >= @min_word_count
                     ORDER BY word_count DESC;
                 """
                job_config = bigquery.QueryJobConfig(
                    query_parameters=[
                        bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
                        bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
                    ]
                )
                query_job = client.query(query, job_config=job_config)  # Make an API request.

                return json.dumps({
                    "schema":[{"field":x} for x in ["word","word_count"]],
                    "data":[
                        # Row values can be accessed by field name or index.
                        {
                            "word":row["word"],
                            "word_count":row["word_count"] 
                        }
                        for row in query_job
                    ]
                })


            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'            
        #

        # array parameterized
        elif(self.env.get("array_parameterized")):
            try:
                schema = ["name","count"]
                """query = 
                    
                    SELECT name, sum(number) as count
                    FROM `bigquery-public-data.usa_names.usa_1910_2013`
                    WHERE gender = @gender
                    AND state IN UNNEST(@states)
                    GROUP BY name
                    ORDER BY count DESC
                    LIMIT 10;
                 """
                job_config = bigquery.QueryJobConfig(
                    query_parameters=[
                        bigquery.ScalarQueryParameter("gender", "STRING", "M"),
                        bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
                    ]
                )
                query_job = client.query(query, job_config=job_config)  # Make an API request.

                return json.dumps({
                    "schema":[{"field":x} for x in schema],
                    "data":[
                        # Row values can be accessed by field name or index.
                        {
                            schema[0]:row[schema[0]],
                            schema[1]:row[schema[1]] 
                        }
                        for row in query_job
                    ]
                })
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'            
        #

        # timestamp parameters
        elif(self.env.get("timestamp_parameterized")):
            try:
                schema = ["date","f0_"]
                """query = 
                    
                    SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);
                 """
                job_config = bigquery.QueryJobConfig(
                    query_parameters=[
                        bigquery.ScalarQueryParameter(
                            "ts_value",
                            "TIMESTAMP",
                            datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
                        )
                    ]
                )
                query_job = client.query(query, job_config=job_config)  # Make an API request.    
                return json.dumps(
                    {
                        "schema":[{"field":x} for x in schema],
                        "data":[
                            # Row values can be accessed by field name or index.
                            {
                                "date":row[0].strftime("%m/%d/%Y, %H:%M:%S"),
                                schema[1]:row[schema[1]] 
                            }
                            for row in query_job
                        ]
                    },
                    default=str
                )
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'            
        #

        # struct parameters
        elif(self.env.get("struct_parameterized")):
            
            try:
                schema = ["s"]
                """ query = 
                SELECT @struct_value AS s;
                """
                job_config = bigquery.QueryJobConfig(
                    query_parameters=[
                        bigquery.StructQueryParameter(
                            "struct_value",
                            bigquery.ScalarQueryParameter("x", "INT64", 1),
                            bigquery.ScalarQueryParameter("y", "STRING", "foo"),
                        )
                    ]
                )
                query_job = client.query(query, job_config=job_config)  # Make an API request.
                return json.dumps({
                    "schema":[{"field":x} for x in schema],
                    "data":[
                        # Row values can be accessed by field name or index.
                        {
                            schema[0]:str(row[schema[0]]),
                        }
                        for row in query_job
                    ]
                })           
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # wildcard
        elif (self.env.get("mutliple_by_wildcard")):
            try:
                job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
                # replace the asterick with a number or change the table suffixes to see the amount of data used
                """ query
                
                SELECT
                max,
                ROUND((max-32)*5/9,1) celsius,
                mo,
                da,
                year
                FROM
                `bigquery-public-data.noaa_gsod.gsod194*` 
                WHERE
                max != 9999.9 # code for missing data
                AND ( _TABLE_SUFFIX = '0'
                    OR _TABLE_SUFFIX = '4' )
                ORDER BY
                max DESC 
                """
                # Start the query, passing in the extra configuration.
                query_job = client.query(
                    query
                    ,
                    job_config=job_config,
                )  # Make an API request.

                # A dry run query completes immediately.
                return "This query will process {} megabytes.".format(query_job.total_bytes_processed/1000000)                
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        # 
                
        # scheduled query
            # using bigquery datatransfer, the names are not joined as much
        elif(self.env.get("scheduled")):
            try:
                # Use standard SQL syntax for the query.
                """ query = 
                
                SELECT
                CURRENT_TIMESTAMP() as current_time,
                @run_time as intended_run_time,
                @run_date as intended_run_date,
                17 as some_integer
                """

                transfer_client = bigquery_datatransfer.DataTransferServiceClient()
                
                project_id = client.project
                dataset_id = self.dataset_names[0]
                # This service account will be used to execute the scheduled queries. Omit
                # this request parameter to run the query as the user with the credentials
                # associated with this client. remember roles and permissions
                # service_account_name = "*****@*****.**"


                parent = transfer_client.common_project_path(project_id)

                transfer_config = bigquery_datatransfer.TransferConfig(
                    destination_dataset_id=dataset_id,
                    display_name="Your Scheduled Query Name",
                    data_source_id="scheduled_query",
                    params={
                        "query": query,
                        "destination_table_name_template": "your_table_{run_date}",
                        "write_disposition": "WRITE_APPEND",
                        "partitioning_field": "",
                    },
                    schedule="every 24 hours",
                )

                transfer_config = transfer_client.create_transfer_config(
                    bigquery_datatransfer.CreateTransferConfigRequest(
                        parent=parent,
                        transfer_config=transfer_config,
                        # service_account_name=service_account_name,
                    )
                )

                return "Created scheduled query '{}'".format(transfer_config.name)       
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #        

        # check schedule status
            # you see them ins scheduled queries
        elif(self.env.get("scheduled_status")):
            try:
                transfer_client = bigquery_datatransfer.DataTransferServiceClient()
                project_id = client.project
                parent = transfer_client.common_project_path(project_id)

                configs = transfer_client.list_transfer_configs(parent=parent)
                result = "Got the following configs:"
                for config in configs:
                    result += f"\n\tID: {config.name}, Schedule: {config.schedule}"      
                return result   
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'            
        #

        # manual runs
            # used to immediatley execute a query
        elif (self.env.get("manual_run")):
            try:
                transfer_client = bigquery_datatransfer.DataTransferServiceClient()
                transfer_config_name = query
                now = datetime.datetime.now(datetime.timezone.utc)
                start_time = now - datetime.timedelta(days=5)
                end_time = now - datetime.timedelta(days=2)

                # Some data sources, such as scheduled_query only support daily run.
                # Truncate start_time and end_time to midnight time (00:00AM UTC).
                start_time = datetime.datetime(
                    start_time.year, start_time.month, start_time.day, tzinfo=datetime.timezone.utc
                )
                end_time = datetime.datetime(
                    end_time.year, end_time.month, end_time.day, tzinfo=datetime.timezone.utc
                )

                response = transfer_client.schedule_transfer_runs(
                    parent=transfer_config_name,
                    start_time=start_time,
                    end_time=end_time,
                )

                result = "Started transfer runs:"
                for run in response.runs:
                    result += f"\n\tbackfill: {run.run_time} run: {run.name}"    
                return result        
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        # 

        # delete scheduled query
        elif(self.env.get("scheduled_delete")):
            try:
                import google.api_core.exceptions
                transfer_client = bigquery_datatransfer.DataTransferServiceClient()
                transfer_config_name = query
                transfer_client.delete_transfer_config(name=transfer_config_name)
                return f"Deleted transfer config: {transfer_config_name}"
            except google.api_core.exceptions.NotFound:
                return "Transfer config not found."                            
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'            
        #

        return "Check the backend env dictionary you did set it so the backend didnt do anything"