class BigqueryRunQuery(bigquery.BigQueryRunQueryTask): """Runs a query on Bigquery and saves it to a table. Parameters ---------- client: `luigi.contrib.bigquery.BigQueryClient()` instance, optional (default is a new instance) project: str, optional E.g. "my-project-id" (default is PROJECT_ID) dataset: str Dataset to write to e.g. "my_dataset_id" table: str Table to write to e.g. "my_table_id" query: str Query to run e.g. "SELECT 'foo' AS bar" References ---------- https://luigi.readthedocs.io/en/stable/api/luigi.contrib.bigquery.html """ client = luigi.Parameter(default=bigquery.BigQueryClient()) project = luigi.Parameter(default=PROJECT_ID) dataset = luigi.Parameter() table = luigi.Parameter() query = luigi.Parameter(default="""SELECT 'foo' AS bar""") def output(self): return bigquery.BigQueryTarget(self.project, self.dataset, self.table, client=self.client)
def setUp(self): super(BigQueryGcloudTest, self).setUp() self.bq_client = bigquery.BigQueryClient(gcs_test.CREDENTIALS) self.table = bigquery.BQTable(project_id=PROJECT_ID, dataset_id=DATASET_ID, table_id=self.id().split('.')[-1]) self.addCleanup(self.bq_client.delete_table, self.table)
def setUp(self): self.gcs_client = gcs.GCSClient(CREDENTIALS) self.bq_client = bigquery.BigQueryClient(CREDENTIALS) self.table_id = "avro_bq_table" self.gcs_dir_url = 'gs://' + BUCKET_NAME + "/foo" self.addCleanup(self.gcs_client.remove, self.gcs_dir_url) self.addCleanup( self.bq_client.delete_dataset, bigquery.BQDataset(PROJECT_ID, DATASET_ID, EU_LOCATION)) self._produce_test_input()
def setUp(self): self.bq_client = bigquery.BigQueryClient(CREDENTIALS) self.gcs_client = gcs.GCSClient(CREDENTIALS) # Setup GCS input data try: self.gcs_client.client.buckets().insert(project=PROJECT_ID, body={ 'name': BUCKET_NAME, 'location': EU_LOCATION }).execute() except googleapiclient.errors.HttpError as ex: # todo verify that existing dataset is not US if ex.resp.status != 409: # bucket already exists raise self.gcs_client.remove(bucket_url(''), recursive=True) self.gcs_client.mkdir(bucket_url('')) text = '\n'.join( map(json.dumps, [{ 'field1': 'hi', 'field2': 1 }, { 'field1': 'bye', 'field2': 2 }])) self.gcs_file = bucket_url(self.id()) self.gcs_client.put_string(text, self.gcs_file) # Setup BigQuery datasets self.table = bigquery.BQTable(project_id=PROJECT_ID, dataset_id=DATASET_ID, table_id=self.id().split('.')[-1], location=None) self.table_eu = bigquery.BQTable(project_id=PROJECT_ID, dataset_id=EU_DATASET_ID, table_id=self.id().split('.')[-1] + '_eu', location=EU_LOCATION) self.addCleanup(self.gcs_client.remove, bucket_url(''), recursive=True) self.addCleanup(self.bq_client.delete_dataset, self.table.dataset) self.addCleanup(self.bq_client.delete_dataset, self.table_eu.dataset) self.bq_client.delete_dataset(self.table.dataset) self.bq_client.delete_dataset(self.table_eu.dataset) self.bq_client.make_dataset(self.table.dataset, body={}) self.bq_client.make_dataset(self.table_eu.dataset, body={})