示例#1
0
    def test_access_entries_setter_invalid_field(self):
        from google.cloud.bigquery.dataset import AccessEntry

        dataset = self._make_one(self.DS_REF)
        phred = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        with self.assertRaises(ValueError):
            dataset.access_entries = [phred, object()]
示例#2
0
def _sink_bigquery_setup(client):
    from google.cloud import bigquery

    dataset_name = "sink_bigquery_%d" % (_millis(),)
    client = bigquery.Client()
    dataset = client.create_dataset(dataset_name)

    # [START sink_dataset_permissions]
    from google.cloud.bigquery.dataset import AccessEntry

    entry_list = dataset.access_entries
    entry_list.append(AccessEntry("WRITER", "groupByEmail", "*****@*****.**"))
    dataset.access_entries = entry_list
    client.update_dataset(dataset, ["access_entries"])  # API call
    # [END sink_dataset_permissions]

    return dataset
    def _init_bigquery_dataset(self):
        from google.cloud import bigquery
        from google.cloud.bigquery.dataset import AccessEntry
        dataset_name = (
            'system_testing_dataset' + _RESOURCE_ID).replace('-', '_')
        dataset_uri = 'bigquery.googleapis.com/projects/%s/datasets/%s' % (
            Config.CLIENT.project, dataset_name,)

        # Create the destination dataset, and set up the ACL to allow
        # Stackdriver Logging to write into it.
        bigquery_client = bigquery.Client()
        dataset_ref = bigquery_client.dataset(dataset_name)
        dataset = bigquery_client.create_dataset(bigquery.Dataset(dataset_ref))
        self.to_delete.append((bigquery_client, dataset))
        bigquery_client.get_dataset(dataset)
        access = AccessEntry(
            'WRITER', 'groupByEmail', '*****@*****.**')
        dataset.access_entries.append(access)
        bigquery_client.update_dataset(dataset, ['access_entries'])
        return dataset_uri
    def _init_bigquery_dataset(self):
        from google.cloud import bigquery
        from google.cloud.bigquery.dataset import AccessEntry

        dataset_name = ("system_testing_dataset" + _RESOURCE_ID).replace("-", "_")
        dataset_uri = "bigquery.googleapis.com/projects/%s/datasets/%s" % (
            Config.CLIENT.project,
            dataset_name,
        )

        # Create the destination dataset, and set up the ACL to allow
        # Stackdriver Logging to write into it.
        retry = RetryErrors((TooManyRequests, BadGateway, ServiceUnavailable))
        bigquery_client = bigquery.Client()
        dataset_ref = bigquery_client.dataset(dataset_name)
        dataset = retry(bigquery_client.create_dataset)(bigquery.Dataset(dataset_ref))
        self.to_delete.append((bigquery_client, dataset))
        bigquery_client.get_dataset(dataset)
        access = AccessEntry("WRITER", "groupByEmail", "*****@*****.**")
        dataset.access_entries.append(access)
        bigquery_client.update_dataset(dataset, ["access_entries"])
        return dataset_uri
示例#5
0
def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
    from google.cloud.bigquery.dataset import AccessEntry

    PATH = "projects/%s/datasets" % PROJECT
    DESCRIPTION = "DESC"
    FRIENDLY_NAME = "FN"
    LOCATION = "US"
    USER_EMAIL = "*****@*****.**"
    LABELS = {"color": "red"}
    VIEW = {
        "projectId": "my-proj",
        "datasetId": "starry-skies",
        "tableId": "northern-hemisphere",
    }
    RESOURCE = {
        "datasetReference": {
            "projectId": PROJECT,
            "datasetId": DS_ID
        },
        "etag": "etag",
        "id": "%s:%s" % (PROJECT, DS_ID),
        "description": DESCRIPTION,
        "friendlyName": FRIENDLY_NAME,
        "location": LOCATION,
        "defaultTableExpirationMs": "3600",
        "labels": LABELS,
        "access": [{
            "role": "OWNER",
            "userByEmail": USER_EMAIL
        }, {
            "view": VIEW
        }],
    }
    conn = client._connection = make_connection(RESOURCE)
    entries = [
        AccessEntry("OWNER", "userByEmail", USER_EMAIL),
        AccessEntry(None, "view", VIEW),
    ]

    ds_ref = DatasetReference(PROJECT, DS_ID)
    before = Dataset(ds_ref)
    before.access_entries = entries
    before.description = DESCRIPTION
    before.friendly_name = FRIENDLY_NAME
    before.default_table_expiration_ms = 3600
    before.location = LOCATION
    before.labels = LABELS
    after = client.create_dataset(before)

    assert after.dataset_id == DS_ID
    assert after.project == PROJECT
    assert after.etag == RESOURCE["etag"]
    assert after.full_dataset_id == RESOURCE["id"]
    assert after.description == DESCRIPTION
    assert after.friendly_name == FRIENDLY_NAME
    assert after.location == LOCATION
    assert after.default_table_expiration_ms == 3600
    assert after.labels == LABELS

    conn.api_request.assert_called_once_with(
        method="POST",
        path="/%s" % PATH,
        data={
            "datasetReference": {
                "projectId": PROJECT,
                "datasetId": DS_ID
            },
            "description":
            DESCRIPTION,
            "friendlyName":
            FRIENDLY_NAME,
            "location":
            LOCATION,
            "defaultTableExpirationMs":
            "3600",
            "access": [{
                "role": "OWNER",
                "userByEmail": USER_EMAIL
            }, {
                "view": VIEW
            }],
            "labels":
            LABELS,
        },
        timeout=DEFAULT_TIMEOUT,
    )
 def to_access_entry(model):
     return AccessEntry(model.role, model.entity_type, model.entity_id)
示例#7
0
def export_bigquery():

    json_acct_info = json.loads(
        base64.b64decode(os.environ["GOOGLE_SERVICE_ACCOUNT"]))

    credentials = service_account.Credentials.from_service_account_info(
        json_acct_info)

    client = bigquery.Client(credentials=credentials)

    with tempfile.TemporaryDirectory() as tmpdirname, get_engine().begin(
    ) as connection:
        dataset_id = "iati-tables.iati"
        client.delete_dataset(dataset_id,
                              delete_contents=True,
                              not_found_ok=True)
        dataset = bigquery.Dataset(dataset_id)
        dataset.location = "EU"

        dataset = client.create_dataset(dataset, timeout=30)

        access_entries = list(dataset.access_entries)
        access_entries.append(
            AccessEntry("READER", "specialGroup", "allAuthenticatedUsers"))
        dataset.access_entries = access_entries

        dataset = client.update_dataset(dataset, ["access_entries"])

        object_details = defaultdict(list)
        result = list(
            connection.execute(
                "SELECT table_name, field, type, docs FROM _fields order by table_name, field_order, field"
            ))

        for row in result:
            object_details[row.table_name].append(
                dict(name=row.field, type=row.type, description=row.docs))

        for object_type, object_details in object_details.items():
            print(f"loading {object_type}")
            result = connection.execute(
                sa.text(
                    f'SELECT to_jsonb("{object_type.lower()}") AS object FROM "{object_type.lower()}"'
                ))
            schema = create_avro_schema(object_type, object_details)

            with open(f"{tmpdirname}/{object_type}.avro", "wb") as out:
                writer(
                    out,
                    parse_schema(schema),
                    generate_avro_records(result, object_details),
                    validator=True,
                    codec="deflate",
                )

            table_id = f"{dataset_id}.{object_type}"

            job_config = bigquery.LoadJobConfig(
                source_format=bigquery.SourceFormat.AVRO)

            with open(f"{tmpdirname}/{object_type}.avro", "rb") as source_file:
                client.load_table_from_file(source_file,
                                            table_id,
                                            job_config=job_config,
                                            size=None,
                                            timeout=5)