def createAnalyticsReport():

    #The real code that initalized the client
    credentials = client.GoogleCredentials(access_token=ACCESS_TOKEN,
                                           refresh_token=REFRESH_TOKEN,
                                           client_id=CLIENT_ID,
                                           client_secret=CLIENT_SECRET,
                                           token_uri=TOKEN_URI,
                                           token_expiry=TOKEN_EXPIRY,
                                           user_agent=USER_AGENT)
    #Initialize Http Protocol
    http = lib2.Http()

    #Authorize client
    authorized = credentials.authorize(http)

    #API Name and Verison, these don't change until
    #they release a new API version for us to play with.
    api_name = 'analyticsreporting'
    api_version = 'v4'

    #Let's build the client
    analytics = google_build(serviceName=api_name,
                             version=api_version,
                             http=authorized)
    return analytics
Пример #2
0
def create_table(project_id, table_id, dataset_id, debug=False):
    """Create a BigQuery table using a schema compatible with Parse.ly events

    :param project_id: The BigQuery project ID to write to
    :type project_id: str
    :param table_id: The BigQuery table ID to write to
    :type table_id: str
    :param dataset_id: The BigQuery dataset ID to write to
    :type dataset_id: str
    """
    fields = mk_bigquery_schema()
    schema = {
        "description": "Parse.ly Data Pipeline",
        "schema": {"fields": fields},
        "tableReference": {
            "projectId": project_id,
            "tableId": table_id,
            "datasetId": dataset_id
        }
    }
    if debug:
        print("Running the following BigQuery JSON table insert:")
        print(json.dumps(schema, indent=4, sort_keys=True))
    credentials = GoogleCredentials.get_application_default()
    bigquery = google_build('bigquery', 'v2', credentials=credentials)
    bigquery.tables().insert(projectId=project_id,
                             datasetId=dataset_id,
                             body=schema).execute()
Пример #3
0
	def __init__(self):
		super().__init__()
		self.svc = google_build(
			'customsearch',
			'v1',
			developerKey='redacted'
		)
Пример #4
0
def copy_from_s3(network,
                 s3_prefix="",
                 access_key_id="",
                 secret_access_key="",
                 region_name="us-east-1",
                 project_id=None,
                 dataset_id=None,
                 table_id=None,
                 dry_run=False):
    """Load events from S3 to BigQuery using the BQ streaming insert API.

    :param network: The Parse.ly network for which to perform writes (eg
        "parsely-blog")
    :type network: str
    :param s3_prefix: The S3 timestamp directory prefix from which to fetch data
        batches, formatted as YYYY/MM/DD
    :type s3_prefix: str
    :param access_key_id: The AWS access key to use when fetching data batches
    :type access_key_id: str
    :param secret_access_key: The AWS secret key to use when fetching data batches
    :type secret_access_key: str
    :param region_name: The AWS region in which to perform fetches
    :type region_name: str
    :param project_id: The BigQuery project ID to write to
    :type project_id: str
    :param dataset_id: The BigQuery dataset ID to write to
    :type dataset_id: str
    :param table_id: The BigQuery table ID to write to
    :type table_id: str
    :param dry_run: If True, don't perform BigQuery writes
    :type dry_run: bool
    """
    bq_conn = None
    if not dry_run:
        bq_conn = google_build(
            'bigquery', 'v2',
            credentials=GoogleCredentials.get_application_default())
    s3_stream = events_s3(network, prefix=s3_prefix, access_key_id=access_key_id,
                          secret_access_key=secret_access_key,
                          region_name=region_name)

    schema_compliant_fields = [column['name'] for column in mk_bigquery_schema()]

    def schema_compliant(jsonline):
        return {k: jsonline.get(k, None) for k in schema_compliant_fields}

    def chunked(seq, chunk_size):
        chunk = []
        for item in seq:
            chunk.append(schema_compliant(item))
            if len(chunk) >= chunk_size:
                yield chunk
                chunk = []
        if chunk:
            yield chunk

    for events in chunked(s3_stream, 500):
        streaming_insert_bigquery(events, bq_conn=bq_conn, project_id=project_id,
                              dataset_id=dataset_id, table_id=table_id)
Пример #5
0
def load_batch_bigquery(network,
                        s3_prefix="",
                        access_key_id="",
                        secret_access_key="",
                        region_name="us-east-1",
                        project_id=None,
                        dataset_id=None,
                        table_id=None,
                        dry_run=False):
    """Load a batch of events from S3 to BigQuery

    :param network: The Parse.ly network for which to perform writes (eg
        "parsely-blog")
    :type network: str
    :param s3_prefix: The S3 timestamp directory prefix from which to fetch data
        batches, formatted as YYYY/MM/DD
    :type s3_prefix: str
    :param access_key_id: The AWS access key to use when fetching data batches
    :type access_key_id: str
    :param secret_access_key: The AWS secret key to use when fetching data batches
    :type secret_access_key: str
    :param region_name: The AWS region in which to perform fetches
    :type region_name: str
    :param project_id: The BigQuery project ID to write to
    :type project_id: str
    :param dataset_id: The BigQuery dataset ID to write to
    :type dataset_id: str
    :param table_id: The BigQuery table ID to write to
    :type table_id: str
    :param dry_run: If True, don't perform BigQuery writes
    :type dry_run: bool
    """
    bq_conn = None
    if not dry_run:
        bq_conn = google_build(
            'bigquery',
            'v2',
            credentials=GoogleCredentials.get_application_default())
    s3_stream = events_s3(network,
                          prefix=s3_prefix,
                          access_key_id=access_key_id,
                          secret_access_key=secret_access_key,
                          region_name=region_name)

    def chunked(seq, chunk_size):
        chunk = []
        for item in seq:
            chunk.append(item)
            if len(chunk) >= chunk_size:
                yield chunk
                chunk = []

    for events in chunked(s3_stream, 500):
        write_events_bigquery(events,
                              bq_conn=bq_conn,
                              project_id=project_id,
                              dataset_id=dataset_id,
                              table_id=table_id)
Пример #6
0
refresh_token = config.refresh_token
client_id = config.client_id
client_secret = config.client_secret

credentials = google.oauth2.credentials.Credentials(
    None,
    refresh_token=refresh_token,
    token_uri=token_uri,
    client_id=client_id,
    client_secret=client_secret)

api_name = 'analyticsreporting'
api_version = 'v4'

api_client = google_build(serviceName=api_name,
                          version=api_version,
                          credentials=credentials)

sample_request = {
    'viewId':
    viewId,
    'dateRanges': {
        'startDate': theepoch,
        'endDate': theepoch
    },
    'dimensions': [{
        'name': 'ga:campaign'
    }, {
        'name': 'ga:devicecategory'
    }, {
        'name': 'ga:city'
Пример #7
0
def pull_data(last_date):
    access_token = new_access_code()
    credentials = client.GoogleCredentials(
        access_token=access_token,
        refresh_token=refresh_token,
        client_id=client_id,
        client_secret=CLIENT_SECRETS_FILE,
        token_uri="https://www.googleapis.com/oauth2/v4/token",
        token_expiry=token_expiry,
        user_agent=user_agent)
    http = lib2.Http()

    #Authorize client
    authorized = credentials.authorize(http)

    #Let's build the client
    api_client = google_build(serviceName=API_SERVICE_NAME,
                              version=API_VERSION,
                              http=authorized)

    #Specify which data you want to pull from Google Analytics

    sample_request = {
        'viewId':
        '83705367',
        'dateRanges': {
            'startDate':
            last_date,
            'endDate':
            datetime.strftime(datetime.now() - timedelta(days=1), '%Y-%m-%d')
        },
        'dimensions': [{
            'name': 'ga:adContent'
        }, {
            'name': 'ga:date'
        }, {
            'name': 'ga:campaign'
        }],
        'metrics': [{
            'expression': 'ga:users'
        }, {
            'expression': 'ga:newUsers'
        }, {
            'expression': 'ga:sessions'
        }, {
            'expression': 'ga:bounceRate'
        }, {
            'expression': 'ga:pageviewsPerSession'
        }, {
            'expression': 'ga:avgSessionDuration'
        }, {
            'expression': 'ga:goal1ConversionRate'
        }, {
            'expression': 'ga:goal1Completions'
        }, {
            'expression': 'ga:goal1Value'
        }],
        'filtersExpression':
        'ga:adContent=@_;ga:adContent!@GDN;ga:campaign!=id',
        'orderBys': {
            'fieldName': 'ga:date',
            'sortOrder': 'ASCENDING'
        },
        'pageSize':
        100000,
        'includeEmptyRows':
        True
    }
    response = api_client.reports().batchGet(body={
        'reportRequests': sample_request
    }).execute()
    return response
Пример #8
0
def create_bigquery_table(project_id, table_id, dataset_id):
    """Create a BigQuery table using a schema compatible with Parse.ly events

    :param project_id: The BigQuery project ID to write to
    :type project_id: str
    :param table_id: The BigQuery table ID to write to
    :type table_id: str
    :param dataset_id: The BigQuery dataset ID to write to
    :type dataset_id: str
    """
    schema = {
        "description": "Parse.ly event data",
        "schema": {
            "fields": [{
                "name": "url",
                "mode": "REQUIRED",
                "type": "STRING"
            }, {
                "name": "apikey",
                "mode": "REQUIRED",
                "type": "STRING"
            }, {
                "name": "action",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "display_avail_height",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "display_avail_width",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "display_pixel_depth",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "display_total_height",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "display_total_width",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "engaged_time_inc",
                "mode": "NULLABLE",
                "type": "INTEGER"
            }, {
                "name": "extra_data",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "referrer",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "session_id",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "session_initial_referrer",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "session_initial_url",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "session_last_session_timestamp",
                "mode": "NULLABLE",
                "type": "TIMESTAMP"
            }, {
                "name": "session_timestamp",
                "mode": "NULLABLE",
                "type": "TIMESTAMP"
            }, {
                "name": "timestamp_info_nginx_ms",
                "mode": "NULLABLE",
                "type": "TIMESTAMP"
            }, {
                "name": "timestamp_info_override_ms",
                "mode": "NULLABLE",
                "type": "TIMESTAMP"
            }, {
                "name": "timestamp_info_pixel_ms",
                "mode": "NULLABLE",
                "type": "TIMESTAMP"
            }, {
                "name": "user_agent",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "visitor_ip",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "visitor_network_id",
                "mode": "NULLABLE",
                "type": "STRING"
            }, {
                "name": "visitor_site_id",
                "mode": "NULLABLE",
                "type": "STRING"
            }]
        },
        "tableReference": {
            "projectId": project_id,
            "tableId": table_id,
            "datasetId": dataset_id
        }
    }
    credentials = GoogleCredentials.get_application_default()
    bigquery = google_build('bigquery', 'v2', credentials=credentials)
    bigquery.tables().insert(projectId=project_id,
                             datasetId=dataset_id,
                             tableId=table_id,
                             body=schema).execute()