df['datetime'] = datetime.datetime(2016, 1, 1) # rearrange the column names df = df[['country', 'datetime', 'death_rate_per_100000','estimated_lower_bound', 'estimated_number_of_road_traffic_deaths_data','estimated_upper_bound']] # there are some rows which has empty spaces in between numbers in the # 'estimated_number_of_road_traffic_deaths_data' column. remove those empty spaces df['estimated_number_of_road_traffic_deaths_data'] = df['estimated_number_of_road_traffic_deaths_data'].str.replace(' ', '') #save processed dataset to csv processed_data_file = os.path.join(data_dir, dataset_name+'_edit.csv') df.to_csv(processed_data_file, index=False) ''' Upload processed data to Carto ''' logger.info('Uploading processed data to Carto.') util_carto.upload_to_carto(processed_data_file, 'LINK') ''' Upload original data and processed data to Amazon S3 storage ''' # initialize AWS variables aws_bucket = 'wri-public-data' s3_prefix = 'resourcewatch/' logger.info('Uploading original data to S3.') # Upload raw data file to S3 # Copy the raw data into a zipped file to upload to S3 raw_data_dir = os.path.join(data_dir, dataset_name+'.zip') with ZipFile(raw_data_dir,'w') as zip: zip.write(raw_data_file, os.path.basename(raw_data_file))
raw_data_dir = os.path.join(data_dir, dataset_name + '.zip') with ZipFile(raw_data_dir, 'w') as zip: zip.write(raw_data_file, os.path.basename(raw_data_file)) logger.info('Uploading original data to S3.') # Upload raw data file to S3 uploaded = util_cloud.aws_upload(raw_data_dir, aws_bucket, s3_prefix + os.path.basename(raw_data_dir)) # Copy the processed data into a zipped file to upload to S3 processed_data_dir = os.path.join(data_dir, dataset_name + '_edit.zip') # Find all the necessary components of the shapefile processed_data_files = glob.glob( os.path.join(data_dir, dataset_name + '_edit.*')) with ZipFile(processed_data_dir, 'w') as zip: for file in processed_data_files: zip.write(file, os.path.basename(file)) logger.info('Uploading processed data to S3.') # Upload processed data file to S3 uploaded = util_cloud.aws_upload( processed_data_dir, aws_bucket, s3_prefix + os.path.basename(processed_data_dir)) ''' Upload processed data to Carto ''' logger.info('Uploading data to Carto.') # upload the shapefile to Carto util_carto.upload_to_carto(processed_data_dir, 'LINK', tags=['ow'])
out_filelist = glob.glob(out_str) zipfile_list = [] # Create a zipfile for the shapefile componenets logger.info('Zipping files for ' + processed_data_file) out_zip_dir = os.path.join(processed_data_file[:-4] + '.zip') with ZipFile(out_zip_dir,'w') as zip: for file in out_filelist: zip.write(file, os.path.basename(file)) zipfile_list.append(file) # Upload zipfile to carto logger.info('Uploading processed data for ' + processed_data_file +' to Carto.') util_carto.upload_to_carto(out_zip_dir, 'LINK', tags=['ow']) ''' Upload original data and processed data to Amazon S3 storage ''' # initialize AWS variables aws_bucket = 'wri-public-data' s3_prefix = 'resourcewatch/' logger.info('Zipping original data files.') # Copy the raw data into a zipped file to upload to S3 raw_zip_dir = os.path.join(data_dir, dataset_name+'.zip') with ZipFile(raw_zip_dir,'w') as zip: for file in raw_namelist:
imports_processed_data_file = os.path.join(data_dir, 'foo_066a_rw0_food_product_import_shares_edit.csv') exports_processed_data_file = os.path.join(data_dir, 'foo_066b_rw0_food_product_export_shares_edit.csv') # We export both datasets to csv files imports_df_edit.to_csv(imports_processed_data_file, index=False) exports_df_edit.to_csv(exports_processed_data_file, index=False) # We append both paths to a processed data dir processed_data_file = [imports_processed_data_file, exports_processed_data_file] ''' Upload processed data to Carto ''' logger.info('Uploading processed data to Carto.') for file in processed_data_file: util_carto.upload_to_carto(file, 'LINK', collision_strategy = 'overwrite') ''' Upload original data and processed data to Amazon S3 storage ''' # initialize AWS variables aws_bucket = 'wri-public-data' s3_prefix = 'resourcewatch/' logger.info('Uploading original data to S3.') # Copy the raw data into a zipped file to upload to S3 raw_data_dir = os.path.join(data_dir, dataset_name+'.zip') with ZipFile(raw_data_dir,'w') as zipped: for file in raw_data_file: zipped.write(file, os.path.basename(file)) # Upload raw data file to S3 uploaded = util_cloud.aws_upload(raw_data_dir, aws_bucket, s3_prefix + os.path.basename(raw_data_dir))