def request_and_upload(tile, folder=None, **context): print(type(tile)) credentialsFromVault = secure_creds.getCredentialsFromEssenceVault( service_account_email) moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google') gcs = CloudStorage(credentialsFromVault) start_date = context['ds_nodash'] end_date = context['ds_nodash'] print(start_date) filename = tile.get_data(start_date, end_date, moat_token) if folder: blob_name = folder + "/" + filename dest_bucket = "rtf_staging" gcs.upload_blob(dest_bucket, blob_name, filename, mode='filename') gcs_uri = "gs://" + dest_bucket + "/" + blob_name print("File Upload to {}".format(gcs_uri)) os.remove(filename) print("{} Removed Locally".format(filename)) return gcs_uri
def moat_report_extract(tile_id,bucket_name,folder,**context): yesterday = context['yesterday_ds_nodash'] ## yyyyddmm credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email) logging.info("Loaded Credentials") moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google') logging.info("Loaded Token") filters = context.get('level_filters') dimensions = context.get('dimensions') tile = MoatTile(tile_id=tile_id,level_filters=filters,dimensions=dimensions) logging.info("Tile Instantiated") local_filename = tile.get_data(yesterday, yesterday, moat_token) logging.info("Data Stored {}".format(local_filename)) gcs = CloudStorage(credentialsFromVault) logging.info("Upload to GCS") dest_blob_name = folder + "/" + local_filename blob = gcs.upload_blob(bucket_name = bucket_name, destination_blob_name = dest_blob_name, source=local_filename, mode='filename') return blob
def gcs_to_bq(pull_id, dest_table, mode, ext, **context): credentialsFromVault = secure_creds.getCredentialsFromEssenceVault( service_account_email) bq = BigQuery(credentialsFromVault) gcs_uri = context['ti'].xcom_pull(task_ids=pull_id) print(gcs_uri) _ = bq.load_from_gcs('RTF_DWH_Moat', gcs_uri, dest_table, mode, ext)
def clean_up(bucket,folder,**context): credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email) gcs = CloudStorage(credentialsFromVault) blobs = gcs.list_blobs(bucket_name=bucket,prefix=folder) for blob in blobs: blob.delete() logging.info("Blobs Cleaned")
def clean_up(pull_task_id, **context): """ move file to gcs processed folder """ blob = context['ti'].xcom_pull(task_ids=pull_task_id) blob_bucket_name, blob_name = blob credentialsFromVault = secure_creds.getCredentialsFromEssenceVault( service_account_email) gcs = CloudStorage(credentialsFromVault) gcs.delete_blob(blob_bucket_name, blob_name)
def dfa_report_load(pull_task_id, dataset_table, schema=None, **context): blob = context['ti'].xcom_pull(task_ids=pull_task_id) blob_bucket_name, blob_name = blob file_uri = "gs://" + blob_bucket_name + "/" + blob_name print("Get Creds from Vault") credentialsFromVault = secure_creds.getCredentialsFromEssenceVault( service_account_email) dataset_id = dataset_table.split(".")[0] dest_table = dataset_table.split(".")[1] print("Auth BQ") bq = BigQuery(credentialsFromVault) print("Load to BQ") bq.load_from_gcs(dataset_id, file_uri, dest_table, mode='Append')
def dfa_report_extract(report_id, **context): if context.get('execution_date'): ## pull execution date - 1 (6hrs b/c airflow in UTC) execution_date = context['execution_date'] reporting_datetime = (execution_date - timedelta(days=1, hours=6)).strftime('%Y-%m-%d') start_date = end_date = reporting_datetime else: start_date = context['start_date'] end_date = context['end_date'] credentialsFromVault = secure_creds.getCredentialsFromEssenceVault( service_account_email) local_filename = get_dfa_report(credentialsFromVault, report_id, start_date, end_date) print("Cleaning File") clean_dcm_file(local_filename) print("Auth GCS") gcs = CloudStorage(credentialsFromVault) folder = "brand_reporting/" if context.get('execution_date'): destination_blob_name = folder + reporting_datetime + "_" + local_filename else: destination_blob_name = folder + end_date + "_" + local_filename print("Upload File") gcs.upload_blob(bucket_name, destination_blob_name, local_filename, mode='filename') stored_blob = gcs.get_blob(bucket_name, destination_blob_name) print("Clean Up Local") os.remove(local_filename) return (stored_blob.bucket.name, stored_blob.name)
def load_bq(tile_id,file_uri,table,**context): yesterday = context['yesterday_ds_nodash'] schema = moat_schema_dict.get(tile_id) if schema: logging.info("Schema Found") credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email) bq = BigQuery(credentialsFromVault) logging.info("Build BQ Job") resp = bq.load_from_gcs("rtf_brand_reporting", file_uri, "{}_{}".format(table,yesterday) , schema=schema, extension='json') logging.info("START JOB: {}".format(resp.job_id)) resp.result() ## in theory this waits for job to finish print("JOB COMPLETE: {}".format(resp.job_id))