def createAnalyticsReport(): #The real code that initalized the client credentials = client.GoogleCredentials(access_token=ACCESS_TOKEN, refresh_token=REFRESH_TOKEN, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, token_uri=TOKEN_URI, token_expiry=TOKEN_EXPIRY, user_agent=USER_AGENT) #Initialize Http Protocol http = lib2.Http() #Authorize client authorized = credentials.authorize(http) #API Name and Verison, these don't change until #they release a new API version for us to play with. api_name = 'analyticsreporting' api_version = 'v4' #Let's build the client analytics = google_build(serviceName=api_name, version=api_version, http=authorized) return analytics
def create_table(project_id, table_id, dataset_id, debug=False): """Create a BigQuery table using a schema compatible with Parse.ly events :param project_id: The BigQuery project ID to write to :type project_id: str :param table_id: The BigQuery table ID to write to :type table_id: str :param dataset_id: The BigQuery dataset ID to write to :type dataset_id: str """ fields = mk_bigquery_schema() schema = { "description": "Parse.ly Data Pipeline", "schema": {"fields": fields}, "tableReference": { "projectId": project_id, "tableId": table_id, "datasetId": dataset_id } } if debug: print("Running the following BigQuery JSON table insert:") print(json.dumps(schema, indent=4, sort_keys=True)) credentials = GoogleCredentials.get_application_default() bigquery = google_build('bigquery', 'v2', credentials=credentials) bigquery.tables().insert(projectId=project_id, datasetId=dataset_id, body=schema).execute()
def __init__(self): super().__init__() self.svc = google_build( 'customsearch', 'v1', developerKey='redacted' )
def copy_from_s3(network, s3_prefix="", access_key_id="", secret_access_key="", region_name="us-east-1", project_id=None, dataset_id=None, table_id=None, dry_run=False): """Load events from S3 to BigQuery using the BQ streaming insert API. :param network: The Parse.ly network for which to perform writes (eg "parsely-blog") :type network: str :param s3_prefix: The S3 timestamp directory prefix from which to fetch data batches, formatted as YYYY/MM/DD :type s3_prefix: str :param access_key_id: The AWS access key to use when fetching data batches :type access_key_id: str :param secret_access_key: The AWS secret key to use when fetching data batches :type secret_access_key: str :param region_name: The AWS region in which to perform fetches :type region_name: str :param project_id: The BigQuery project ID to write to :type project_id: str :param dataset_id: The BigQuery dataset ID to write to :type dataset_id: str :param table_id: The BigQuery table ID to write to :type table_id: str :param dry_run: If True, don't perform BigQuery writes :type dry_run: bool """ bq_conn = None if not dry_run: bq_conn = google_build( 'bigquery', 'v2', credentials=GoogleCredentials.get_application_default()) s3_stream = events_s3(network, prefix=s3_prefix, access_key_id=access_key_id, secret_access_key=secret_access_key, region_name=region_name) schema_compliant_fields = [column['name'] for column in mk_bigquery_schema()] def schema_compliant(jsonline): return {k: jsonline.get(k, None) for k in schema_compliant_fields} def chunked(seq, chunk_size): chunk = [] for item in seq: chunk.append(schema_compliant(item)) if len(chunk) >= chunk_size: yield chunk chunk = [] if chunk: yield chunk for events in chunked(s3_stream, 500): streaming_insert_bigquery(events, bq_conn=bq_conn, project_id=project_id, dataset_id=dataset_id, table_id=table_id)
def load_batch_bigquery(network, s3_prefix="", access_key_id="", secret_access_key="", region_name="us-east-1", project_id=None, dataset_id=None, table_id=None, dry_run=False): """Load a batch of events from S3 to BigQuery :param network: The Parse.ly network for which to perform writes (eg "parsely-blog") :type network: str :param s3_prefix: The S3 timestamp directory prefix from which to fetch data batches, formatted as YYYY/MM/DD :type s3_prefix: str :param access_key_id: The AWS access key to use when fetching data batches :type access_key_id: str :param secret_access_key: The AWS secret key to use when fetching data batches :type secret_access_key: str :param region_name: The AWS region in which to perform fetches :type region_name: str :param project_id: The BigQuery project ID to write to :type project_id: str :param dataset_id: The BigQuery dataset ID to write to :type dataset_id: str :param table_id: The BigQuery table ID to write to :type table_id: str :param dry_run: If True, don't perform BigQuery writes :type dry_run: bool """ bq_conn = None if not dry_run: bq_conn = google_build( 'bigquery', 'v2', credentials=GoogleCredentials.get_application_default()) s3_stream = events_s3(network, prefix=s3_prefix, access_key_id=access_key_id, secret_access_key=secret_access_key, region_name=region_name) def chunked(seq, chunk_size): chunk = [] for item in seq: chunk.append(item) if len(chunk) >= chunk_size: yield chunk chunk = [] for events in chunked(s3_stream, 500): write_events_bigquery(events, bq_conn=bq_conn, project_id=project_id, dataset_id=dataset_id, table_id=table_id)
refresh_token = config.refresh_token client_id = config.client_id client_secret = config.client_secret credentials = google.oauth2.credentials.Credentials( None, refresh_token=refresh_token, token_uri=token_uri, client_id=client_id, client_secret=client_secret) api_name = 'analyticsreporting' api_version = 'v4' api_client = google_build(serviceName=api_name, version=api_version, credentials=credentials) sample_request = { 'viewId': viewId, 'dateRanges': { 'startDate': theepoch, 'endDate': theepoch }, 'dimensions': [{ 'name': 'ga:campaign' }, { 'name': 'ga:devicecategory' }, { 'name': 'ga:city'
def pull_data(last_date): access_token = new_access_code() credentials = client.GoogleCredentials( access_token=access_token, refresh_token=refresh_token, client_id=client_id, client_secret=CLIENT_SECRETS_FILE, token_uri="https://www.googleapis.com/oauth2/v4/token", token_expiry=token_expiry, user_agent=user_agent) http = lib2.Http() #Authorize client authorized = credentials.authorize(http) #Let's build the client api_client = google_build(serviceName=API_SERVICE_NAME, version=API_VERSION, http=authorized) #Specify which data you want to pull from Google Analytics sample_request = { 'viewId': '83705367', 'dateRanges': { 'startDate': last_date, 'endDate': datetime.strftime(datetime.now() - timedelta(days=1), '%Y-%m-%d') }, 'dimensions': [{ 'name': 'ga:adContent' }, { 'name': 'ga:date' }, { 'name': 'ga:campaign' }], 'metrics': [{ 'expression': 'ga:users' }, { 'expression': 'ga:newUsers' }, { 'expression': 'ga:sessions' }, { 'expression': 'ga:bounceRate' }, { 'expression': 'ga:pageviewsPerSession' }, { 'expression': 'ga:avgSessionDuration' }, { 'expression': 'ga:goal1ConversionRate' }, { 'expression': 'ga:goal1Completions' }, { 'expression': 'ga:goal1Value' }], 'filtersExpression': 'ga:adContent=@_;ga:adContent!@GDN;ga:campaign!=id', 'orderBys': { 'fieldName': 'ga:date', 'sortOrder': 'ASCENDING' }, 'pageSize': 100000, 'includeEmptyRows': True } response = api_client.reports().batchGet(body={ 'reportRequests': sample_request }).execute() return response
def create_bigquery_table(project_id, table_id, dataset_id): """Create a BigQuery table using a schema compatible with Parse.ly events :param project_id: The BigQuery project ID to write to :type project_id: str :param table_id: The BigQuery table ID to write to :type table_id: str :param dataset_id: The BigQuery dataset ID to write to :type dataset_id: str """ schema = { "description": "Parse.ly event data", "schema": { "fields": [{ "name": "url", "mode": "REQUIRED", "type": "STRING" }, { "name": "apikey", "mode": "REQUIRED", "type": "STRING" }, { "name": "action", "mode": "NULLABLE", "type": "STRING" }, { "name": "display_avail_height", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "display_avail_width", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "display_pixel_depth", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "display_total_height", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "display_total_width", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "engaged_time_inc", "mode": "NULLABLE", "type": "INTEGER" }, { "name": "extra_data", "mode": "NULLABLE", "type": "STRING" }, { "name": "referrer", "mode": "NULLABLE", "type": "STRING" }, { "name": "session_id", "mode": "NULLABLE", "type": "STRING" }, { "name": "session_initial_referrer", "mode": "NULLABLE", "type": "STRING" }, { "name": "session_initial_url", "mode": "NULLABLE", "type": "STRING" }, { "name": "session_last_session_timestamp", "mode": "NULLABLE", "type": "TIMESTAMP" }, { "name": "session_timestamp", "mode": "NULLABLE", "type": "TIMESTAMP" }, { "name": "timestamp_info_nginx_ms", "mode": "NULLABLE", "type": "TIMESTAMP" }, { "name": "timestamp_info_override_ms", "mode": "NULLABLE", "type": "TIMESTAMP" }, { "name": "timestamp_info_pixel_ms", "mode": "NULLABLE", "type": "TIMESTAMP" }, { "name": "user_agent", "mode": "NULLABLE", "type": "STRING" }, { "name": "visitor_ip", "mode": "NULLABLE", "type": "STRING" }, { "name": "visitor_network_id", "mode": "NULLABLE", "type": "STRING" }, { "name": "visitor_site_id", "mode": "NULLABLE", "type": "STRING" }] }, "tableReference": { "projectId": project_id, "tableId": table_id, "datasetId": dataset_id } } credentials = GoogleCredentials.get_application_default() bigquery = google_build('bigquery', 'v2', credentials=credentials) bigquery.tables().insert(projectId=project_id, datasetId=dataset_id, tableId=table_id, body=schema).execute()