def write_to_new_line_json(items, output_path, entity): try: #filename = os.path.join(output_path, entity + '_' + last_run_date + '.json') filename = os.path.join(output_path, entity + '.json') with open(filename,'w') as out: if k in ['account', 'property', 'view']: for item in items: json.dump(item,out) out.write('\n') else: for d in items: for item in d: json.dump(item,out) out.write('\n') return True, filename except Exception as e: mu.update_log(log_file, 'write_to_new_line_json' + str(e)) return False, None
source_account = #accountID string dest_view = #ID string for view/profile ## functions def get_service(api_name, api_version, scopes, key_file_location): credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes) service = build(api_name, api_version, credentials=credentials) return service ## main if __name__ == '__main__': ## init log_file = os.path.join(output_path,'log','ga_filters' + '.txt') mu.update_log(log_file, 'exe filepath = ' + os.path.realpath(__file__)) return_value = 1 ## connect to api mu.update_log(log_file, 'connecting to api') a_readonly = get_service(api_name='analytics', api_version='v3', scopes=a_readonly_scope, key_file_location=service_account) a_edit = get_service(api_name='analytics', api_version='v3', scopes=a_edit_scope, key_file_location=service_account) link_filters = [] ## get all filter refs for the specified account mu.update_log(log_file, 'getting source filters for ' + source_account) link_filters = [] source_filter_ids = a_readonly.management().filters().list(accountId=source_account).execute() [link_filters.append(source_filter_id['id']) for source_filter_id in source_filter_ids.get('items',[]) ]
def wait_check(query_count): if query_count == 1900: mu.update_log(log_file, 'quick snooze') time.sleep(100) query_count = 0
if query_count == 1900: mu.update_log(log_file, 'quick snooze') time.sleep(100) query_count = 0 ## main if __name__ == '__main__': ## initialise log_file = os.path.join(output_path,'log', 'ga_etl' + '.txt') return_value= 1 files_to_upload = [] query_count = 0 ## connects to api mu.update_log(log_file, 'connecting to api service') a_serv = get_service(api_name='analytics', api_version='v3', scopes=a_scope, key_file_location=service_account) au_serv = get_service(api_name='analytics', api_version='v3', scopes=au_scope, key_file_location=service_account) ## extracts accounts, properties & views mu.update_log(log_file, 'extracting accounts, properties & views') entities['account'] = (a_serv.management().accounts().list().execute()).get('items',[]) entities['property'] = (a_serv.management().webproperties().list(accountId='~all').execute()).get('items',[]) entities['view'] = (a_serv.management().profiles().list(accountId='~all',webPropertyId='~all').execute()).get('items',[]) query_count += 3 ## iterates over accounts mu.update_log(log_file, 'iterate over accounts') for a in entities['account']: ## populates account_filter & account_user entities['account_filter'].append((a_serv.management().filters().list(accountId=a['id']).execute()).get('items',[]))
fields = () return bq.SchemaField(name=field_dict['name'], field_type=field_dict['type'], mode=field_dict['mode'], fields=fields) ## main if __name__ == '__main__': ## initialise gcs_client = storage.Client() bq_client = bq.Client() log_file = os.path.join(output_path, 'log', 'etl_gcs_to_bq' + '.txt') ## import etl config mu.update_log(log_file, 'importing etl config') bucket = gcs_client.get_bucket(gcs_bucket) blob = bucket.get_blob(gcs_path + etl_config) task_config = json.loads(blob.download_as_string()) ## iterate over task_config for task in task_config: mu.update_log(log_file, 'importing schema ' + task['destination_table']) job_schema = Schema(gcs_bucket, gcs_path + task['schema_file']) job_schema.process_schema() mu.update_log(log_file, 'running import for ' + task['destination_table']) dataset_ref = bq_client.dataset(task['dataset']) job_config = bq.LoadJobConfig() job_config.write_disposition = task['write_disposition']
Filter.__init__(self,data) self.advancedDetails = data['advancedDetails'] ## functions def get_service(api_name, api_version, scopes, key_file_location): credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes) service = build(api_name, api_version, credentials=credentials) return service ## main if __name__ == '__main__': ## init log_file = os.path.join(output_path,'log','ga_filters' + '.txt') mu.update_log(log_file, 'exe filepath = ' + os.path.realpath(__file__)) return_value = 1 ## connect to api mu.update_log(log_file, 'connecting to api') a_readonly = get_service(api_name='analytics', api_version='v3', scopes=a_readonly_scope, key_file_location=service_account) a_edit = get_service(api_name='analytics', api_version='v3', scopes=a_edit_scope, key_file_location=service_account) ## get source account filters mu.update_log(log_file, 'getting source filters for ' + source_account) source_filters = a_readonly.management().filters().list(accountId=source_account).execute() dest_filters = [] ## populate the destination filter array for source_filter in source_filters.get('items',[]): if source_filter['type'] == 'EXCLUDE':