示例#1
0
def request_and_upload(tile, folder=None, **context):
    print(type(tile))
    credentialsFromVault = secure_creds.getCredentialsFromEssenceVault(
        service_account_email)
    moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google')

    gcs = CloudStorage(credentialsFromVault)

    start_date = context['ds_nodash']
    end_date = context['ds_nodash']
    print(start_date)

    filename = tile.get_data(start_date, end_date, moat_token)

    if folder:
        blob_name = folder + "/" + filename
    dest_bucket = "rtf_staging"

    gcs.upload_blob(dest_bucket, blob_name, filename, mode='filename')

    gcs_uri = "gs://" + dest_bucket + "/" + blob_name

    print("File Upload to {}".format(gcs_uri))

    os.remove(filename)
    print("{} Removed Locally".format(filename))

    return gcs_uri
示例#2
0
def moat_report_extract(tile_id,bucket_name,folder,**context):    
    yesterday = context['yesterday_ds_nodash'] ## yyyyddmm
    credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email)
    logging.info("Loaded Credentials")

    moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google')
    logging.info("Loaded Token")

    filters = context.get('level_filters')
    dimensions = context.get('dimensions')

    tile = MoatTile(tile_id=tile_id,level_filters=filters,dimensions=dimensions)
    logging.info("Tile Instantiated")

    local_filename = tile.get_data(yesterday,
                                    yesterday,
                                    moat_token)

    logging.info("Data Stored {}".format(local_filename))
    
    gcs = CloudStorage(credentialsFromVault)
    
    logging.info("Upload to GCS")

    dest_blob_name = folder + "/" + local_filename

    blob = gcs.upload_blob(bucket_name = bucket_name,
                            destination_blob_name = dest_blob_name,
                            source=local_filename,
                            mode='filename')

    return blob
示例#3
0
def gcs_to_bq(pull_id, dest_table, mode, ext, **context):
    credentialsFromVault = secure_creds.getCredentialsFromEssenceVault(
        service_account_email)
    bq = BigQuery(credentialsFromVault)

    gcs_uri = context['ti'].xcom_pull(task_ids=pull_id)
    print(gcs_uri)

    _ = bq.load_from_gcs('RTF_DWH_Moat', gcs_uri, dest_table, mode, ext)
示例#4
0
def clean_up(bucket,folder,**context):       
    credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email)
    gcs = CloudStorage(credentialsFromVault)

    blobs = gcs.list_blobs(bucket_name=bucket,prefix=folder)

    for blob in blobs:
        blob.delete()
    
    logging.info("Blobs Cleaned")
示例#5
0
def clean_up(pull_task_id, **context):
    """
    move file to gcs processed folder
    """
    blob = context['ti'].xcom_pull(task_ids=pull_task_id)
    blob_bucket_name, blob_name = blob

    credentialsFromVault = secure_creds.getCredentialsFromEssenceVault(
        service_account_email)
    gcs = CloudStorage(credentialsFromVault)

    gcs.delete_blob(blob_bucket_name, blob_name)
示例#6
0
def dfa_report_load(pull_task_id, dataset_table, schema=None, **context):
    blob = context['ti'].xcom_pull(task_ids=pull_task_id)
    blob_bucket_name, blob_name = blob

    file_uri = "gs://" + blob_bucket_name + "/" + blob_name

    print("Get Creds from Vault")

    credentialsFromVault = secure_creds.getCredentialsFromEssenceVault(
        service_account_email)

    dataset_id = dataset_table.split(".")[0]
    dest_table = dataset_table.split(".")[1]

    print("Auth BQ")
    bq = BigQuery(credentialsFromVault)

    print("Load to BQ")
    bq.load_from_gcs(dataset_id, file_uri, dest_table, mode='Append')
示例#7
0
def dfa_report_extract(report_id, **context):
    if context.get('execution_date'):
        ## pull execution date - 1 (6hrs b/c airflow in UTC)
        execution_date = context['execution_date']
        reporting_datetime = (execution_date -
                              timedelta(days=1, hours=6)).strftime('%Y-%m-%d')
        start_date = end_date = reporting_datetime

    else:
        start_date = context['start_date']
        end_date = context['end_date']

    credentialsFromVault = secure_creds.getCredentialsFromEssenceVault(
        service_account_email)

    local_filename = get_dfa_report(credentialsFromVault, report_id,
                                    start_date, end_date)
    print("Cleaning File")
    clean_dcm_file(local_filename)

    print("Auth GCS")
    gcs = CloudStorage(credentialsFromVault)
    folder = "brand_reporting/"

    if context.get('execution_date'):
        destination_blob_name = folder + reporting_datetime + "_" + local_filename
    else:
        destination_blob_name = folder + end_date + "_" + local_filename

    print("Upload File")

    gcs.upload_blob(bucket_name,
                    destination_blob_name,
                    local_filename,
                    mode='filename')

    stored_blob = gcs.get_blob(bucket_name, destination_blob_name)

    print("Clean Up Local")
    os.remove(local_filename)

    return (stored_blob.bucket.name, stored_blob.name)
示例#8
0
def load_bq(tile_id,file_uri,table,**context):
    yesterday = context['yesterday_ds_nodash']
    
    schema = moat_schema_dict.get(tile_id)
    if schema:
        logging.info("Schema Found")

    credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email)
    bq = BigQuery(credentialsFromVault)
    
    logging.info("Build BQ Job")
    resp = bq.load_from_gcs("rtf_brand_reporting",
                    file_uri,
                    "{}_{}".format(table,yesterday) ,
                    schema=schema,
                    extension='json')

    logging.info("START JOB: {}".format(resp.job_id))

    resp.result() ## in theory this waits for job to finish
    print("JOB COMPLETE: {}".format(resp.job_id))