def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry dataset = self._make_one(self.DS_REF) phred = AccessEntry("OWNER", "userByEmail", "*****@*****.**") with self.assertRaises(ValueError): dataset.access_entries = [phred, object()]
def _sink_bigquery_setup(client): from google.cloud import bigquery dataset_name = "sink_bigquery_%d" % (_millis(),) client = bigquery.Client() dataset = client.create_dataset(dataset_name) # [START sink_dataset_permissions] from google.cloud.bigquery.dataset import AccessEntry entry_list = dataset.access_entries entry_list.append(AccessEntry("WRITER", "groupByEmail", "*****@*****.**")) dataset.access_entries = entry_list client.update_dataset(dataset, ["access_entries"]) # API call # [END sink_dataset_permissions] return dataset
def _init_bigquery_dataset(self): from google.cloud import bigquery from google.cloud.bigquery.dataset import AccessEntry dataset_name = ( 'system_testing_dataset' + _RESOURCE_ID).replace('-', '_') dataset_uri = 'bigquery.googleapis.com/projects/%s/datasets/%s' % ( Config.CLIENT.project, dataset_name,) # Create the destination dataset, and set up the ACL to allow # Stackdriver Logging to write into it. bigquery_client = bigquery.Client() dataset_ref = bigquery_client.dataset(dataset_name) dataset = bigquery_client.create_dataset(bigquery.Dataset(dataset_ref)) self.to_delete.append((bigquery_client, dataset)) bigquery_client.get_dataset(dataset) access = AccessEntry( 'WRITER', 'groupByEmail', '*****@*****.**') dataset.access_entries.append(access) bigquery_client.update_dataset(dataset, ['access_entries']) return dataset_uri
def _init_bigquery_dataset(self): from google.cloud import bigquery from google.cloud.bigquery.dataset import AccessEntry dataset_name = ("system_testing_dataset" + _RESOURCE_ID).replace("-", "_") dataset_uri = "bigquery.googleapis.com/projects/%s/datasets/%s" % ( Config.CLIENT.project, dataset_name, ) # Create the destination dataset, and set up the ACL to allow # Stackdriver Logging to write into it. retry = RetryErrors((TooManyRequests, BadGateway, ServiceUnavailable)) bigquery_client = bigquery.Client() dataset_ref = bigquery_client.dataset(dataset_name) dataset = retry(bigquery_client.create_dataset)(bigquery.Dataset(dataset_ref)) self.to_delete.append((bigquery_client, dataset)) bigquery_client.get_dataset(dataset) access = AccessEntry("WRITER", "groupByEmail", "*****@*****.**") dataset.access_entries.append(access) bigquery_client.update_dataset(dataset, ["access_entries"]) return dataset_uri
def test_create_dataset_w_attrs(client, PROJECT, DS_ID): from google.cloud.bigquery.dataset import AccessEntry PATH = "projects/%s/datasets" % PROJECT DESCRIPTION = "DESC" FRIENDLY_NAME = "FN" LOCATION = "US" USER_EMAIL = "*****@*****.**" LABELS = {"color": "red"} VIEW = { "projectId": "my-proj", "datasetId": "starry-skies", "tableId": "northern-hemisphere", } RESOURCE = { "datasetReference": { "projectId": PROJECT, "datasetId": DS_ID }, "etag": "etag", "id": "%s:%s" % (PROJECT, DS_ID), "description": DESCRIPTION, "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", "labels": LABELS, "access": [{ "role": "OWNER", "userByEmail": USER_EMAIL }, { "view": VIEW }], } conn = client._connection = make_connection(RESOURCE) entries = [ AccessEntry("OWNER", "userByEmail", USER_EMAIL), AccessEntry(None, "view", VIEW), ] ds_ref = DatasetReference(PROJECT, DS_ID) before = Dataset(ds_ref) before.access_entries = entries before.description = DESCRIPTION before.friendly_name = FRIENDLY_NAME before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS after = client.create_dataset(before) assert after.dataset_id == DS_ID assert after.project == PROJECT assert after.etag == RESOURCE["etag"] assert after.full_dataset_id == RESOURCE["id"] assert after.description == DESCRIPTION assert after.friendly_name == FRIENDLY_NAME assert after.location == LOCATION assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS conn.api_request.assert_called_once_with( method="POST", path="/%s" % PATH, data={ "datasetReference": { "projectId": PROJECT, "datasetId": DS_ID }, "description": DESCRIPTION, "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", "access": [{ "role": "OWNER", "userByEmail": USER_EMAIL }, { "view": VIEW }], "labels": LABELS, }, timeout=DEFAULT_TIMEOUT, )
def to_access_entry(model): return AccessEntry(model.role, model.entity_type, model.entity_id)
def export_bigquery(): json_acct_info = json.loads( base64.b64decode(os.environ["GOOGLE_SERVICE_ACCOUNT"])) credentials = service_account.Credentials.from_service_account_info( json_acct_info) client = bigquery.Client(credentials=credentials) with tempfile.TemporaryDirectory() as tmpdirname, get_engine().begin( ) as connection: dataset_id = "iati-tables.iati" client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) dataset = bigquery.Dataset(dataset_id) dataset.location = "EU" dataset = client.create_dataset(dataset, timeout=30) access_entries = list(dataset.access_entries) access_entries.append( AccessEntry("READER", "specialGroup", "allAuthenticatedUsers")) dataset.access_entries = access_entries dataset = client.update_dataset(dataset, ["access_entries"]) object_details = defaultdict(list) result = list( connection.execute( "SELECT table_name, field, type, docs FROM _fields order by table_name, field_order, field" )) for row in result: object_details[row.table_name].append( dict(name=row.field, type=row.type, description=row.docs)) for object_type, object_details in object_details.items(): print(f"loading {object_type}") result = connection.execute( sa.text( f'SELECT to_jsonb("{object_type.lower()}") AS object FROM "{object_type.lower()}"' )) schema = create_avro_schema(object_type, object_details) with open(f"{tmpdirname}/{object_type}.avro", "wb") as out: writer( out, parse_schema(schema), generate_avro_records(result, object_details), validator=True, codec="deflate", ) table_id = f"{dataset_id}.{object_type}" job_config = bigquery.LoadJobConfig( source_format=bigquery.SourceFormat.AVRO) with open(f"{tmpdirname}/{object_type}.avro", "rb") as source_file: client.load_table_from_file(source_file, table_id, job_config=job_config, size=None, timeout=5)