def bq_upload(context, filepath, raw_filepath=None, partitions=None): table_id = context.resources.basedosdados_config['table_id'] dataset_id = context.resources.basedosdados_config['dataset_id'] context.log.info(f""" Received inputs: raw_filepath = {raw_filepath}, type = {type(raw_filepath)} treated_filepath = {filepath}, type = {type(filepath)} dataset_id = {dataset_id}, type = {type(dataset_id)} table_id = {table_id}, type = {type(table_id)} partitions = {partitions}, type = {type(partitions)} """) # Upload raw to staging if raw_filepath: st = Storage(table_id=table_id, dataset_id=dataset_id) context.log.info( f"Uploading raw file: {raw_filepath} to bucket {st.bucket_name} at {st.bucket_name}/{dataset_id}/{table_id}" ) st.upload(path=raw_filepath, partitions=partitions, mode='raw', if_exists='replace') # creates and publish table if it does not exist, append to it otherwise if partitions: # If table is partitioned, get parent directory wherein partitions are stored tb_dir = filepath.split(partitions)[0] create_or_append_table(context, dataset_id, table_id, tb_dir) else: create_or_append_table(context, dataset_id, table_id, filepath) # Delete local Files context.log.info(f"Deleting local files: {raw_filepath}, {filepath}") cleanup_local(filepath, raw_filepath)
def upload(context, filename): dataset_id = context.resources.basedosdados_config["dataset_id"] table_id = context.resources.basedosdados_config["table_id"] st = Storage(dataset_id, table_id) context.log.info( f"Uploading {filename} to GCS at:{st.bucket_name}/staging/{dataset_id}/{table_id}", ) st.upload(path=filename, mode="staging", if_exists="replace") return filename