示例#1
0
文件: gbq.py 项目: wolfws/pandas-gbq
    def __init__(
        self,
        project_id,
        reauth=False,
        private_key=None,
        auth_local_webserver=False,
        dialect="legacy",
        location=None,
        try_credentials=None,
        credentials=None,
    ):
        global context
        from google.api_core.exceptions import GoogleAPIError
        from google.api_core.exceptions import ClientError
        from pandas_gbq import auth

        self.http_error = (ClientError, GoogleAPIError)
        self.project_id = project_id
        self.location = location
        self.reauth = reauth
        self.private_key = private_key
        self.auth_local_webserver = auth_local_webserver
        self.dialect = dialect
        self.credentials = credentials
        self.credentials_path = _get_credentials_file()
        default_project = None

        # Load credentials from cache.
        if not self.credentials:
            self.credentials = context.credentials
            default_project = context.project

        # Credentials were explicitly asked for, so don't use the cache.
        if private_key or reauth or not self.credentials:
            self.credentials, default_project = auth.get_credentials(
                private_key=private_key,
                project_id=project_id,
                reauth=reauth,
                auth_local_webserver=auth_local_webserver,
                try_credentials=try_credentials,
            )

        if self.project_id is None:
            self.project_id = default_project

        if self.project_id is None:
            raise ValueError(
                "Could not determine project ID and one was not supplied.")

        # Cache the credentials if they haven't been set yet.
        if context.credentials is None:
            context.credentials = self.credentials
        if context.project is None:
            context.project = self.project_id

        self.client = self.get_client()

        # BQ Queries costs $5 per TB. First 1 TB per month is free
        # see here for more: https://cloud.google.com/bigquery/pricing
        self.query_price_for_TB = 5.0 / 2**40  # USD/TB
示例#2
0
def setup_bigquery_and_config():
    project_id = os.environ.get("GOOGLE_PROJECT_ID")
    if not project_id:
        _, project_id = get_credentials()
    bigquery_client = bigquery.Client(project=project_id)
    datasets = []
    config_files = []

    def setup(
        validate_records=False, stream_data=False, replication_method="HYBRID",
    ):
        dataset_id = "target_bigquery_test_" + "".join(choice(ascii_uppercase) for i in range(12))
        target_config = {
            "project_id": project_id,
            "dataset_id": dataset_id,
            "validate_records": validate_records,
            "stream_data": stream_data,
            "replication_method": replication_method,
            "disable_collection": True,
        }
        config_filename = f"target-config-{dataset_id}.json"
        config_files.append(config_filename)
        with open(config_filename, "w") as f:
            f.write(json.dumps(target_config))

        datasets.append(bigquery_client.create_dataset(dataset_id))
        return project_id, bigquery_client, config_filename, dataset_id

    yield setup

    for config_file in config_files:
        os.remove(config_file)

    for dataset in datasets:
        bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=False)
示例#3
0
    def __init__(self, project_id, reauth=False,
                 private_key=None, auth_local_webserver=False,
                 dialect='legacy', location=None):
        from google.api_core.exceptions import GoogleAPIError
        from google.api_core.exceptions import ClientError
        from pandas_gbq import auth
        self.http_error = (ClientError, GoogleAPIError)
        self.project_id = project_id
        self.location = location
        self.reauth = reauth
        self.private_key = private_key
        self.auth_local_webserver = auth_local_webserver
        self.dialect = dialect
        self.credentials_path = _get_credentials_file()
        self.credentials, default_project = auth.get_credentials(
            private_key=private_key, project_id=project_id, reauth=reauth,
            auth_local_webserver=auth_local_webserver)

        if self.project_id is None:
            self.project_id = default_project

        if self.project_id is None:
            raise ValueError(
                'Could not determine project ID and one was not supplied.')

        self.client = self.get_client()

        # BQ Queries costs $5 per TB. First 1 TB per month is free
        # see here for more: https://cloud.google.com/bigquery/pricing
        self.query_price_for_TB = 5. / 2**40  # USD/TB
def main(buf=sys.stdin.buffer):
    _, project_id = get_credentials()

    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config", help="Config file")
    args = parser.parse_args()
    config = {"project_id": project_id}

    if args.config:
        with open(args.config) as input:
            config.update(json.load(input))

    if not config.get("disable_collection", False):
        logger.info(
            "Sending version information to singer.io. "
            + "To disable sending anonymous usage data, set "
            + 'the config parameter "disable_collection" to true'
        )
        threading.Thread(target=send_usage_stats).start()

    input = io.TextIOWrapper(buf, encoding="utf-8")
    state = persist_lines(config, input)
    emit_state(state)
    logger.debug("Exiting normally")
    return state
示例#5
0
    def __init__(
        self,
        project_id,
        reauth=False,
        private_key=None,
        auth_local_webserver=False,
        dialect="standard",
        location=None,
        credentials=None,
        use_bqstorage_api=False,
    ):
        global context
        from google.api_core.exceptions import GoogleAPIError
        from google.api_core.exceptions import ClientError
        from pandas_gbq import auth

        self.http_error = (ClientError, GoogleAPIError)
        self.project_id = project_id
        self.location = location
        self.reauth = reauth
        self.private_key = private_key
        self.auth_local_webserver = auth_local_webserver
        self.dialect = dialect
        self.credentials = credentials
        default_project = None

        # Service account credentials have a project associated with them.
        # Prefer that project if none was supplied.
        if self.project_id is None and hasattr(self.credentials, "project_id"):
            self.project_id = credentials.project_id

        # Load credentials from cache.
        if not self.credentials:
            self.credentials = context.credentials
            default_project = context.project

        # Credentials were explicitly asked for, so don't use the cache.
        if private_key or reauth or not self.credentials:
            self.credentials, default_project = auth.get_credentials(
                private_key=private_key,
                project_id=project_id,
                reauth=reauth,
                auth_local_webserver=auth_local_webserver,
            )

        if self.project_id is None:
            self.project_id = default_project

        if self.project_id is None:
            raise ValueError(
                "Could not determine project ID and one was not supplied."
            )

        # Cache the credentials if they haven't been set yet.
        if context.credentials is None:
            context.credentials = self.credentials
        if context.project is None:
            context.project = self.project_id

        self.client = self.get_client()
        self.bqstorage_client = _make_bqstorage_client(
            use_bqstorage_api, self.credentials
        )

        # BQ Queries costs $5 per TB. First 1 TB per month is free
        # see here for more: https://cloud.google.com/bigquery/pricing
        self.query_price_for_TB = 5.0 / 2 ** 40  # USD/TB