def main(): etl() input_file_path = "{}/{}".format(ec2_input_path, INPUT_FILE_NAME) nc_fid = netCDF4.Dataset(input_file_path, 'r') nc_attrs, nc_dims, nc_vars = aqueduct3.ncdump(nc_fid) print(nc_attrs, nc_dims, nc_vars) y_dimension = nc_fid.variables["lat"].shape[0] x_dimension = nc_fid.variables["lon"].shape[0] default_geotransform, default_geoprojection = aqueduct3.get_global_georeference( np.ones([y_dimension, x_dimension])) Z = nc_fid.variables[EXPORT_VARIABLE][:, :] Z = np.flipud(Z) aqueduct3.write_geotiff(output_file_path, default_geotransform, default_geoprojection, Z, nodata_value=-9999, datatype=gdal.GDT_Int32) get_ipython().system( 'aws s3 cp {ec2_output_path} {s3_output_path} --recursive')
# In[4]: get_ipython().system('rm -r {ec2_output_path}') get_ipython().system('mkdir -p {ec2_output_path}') # In[5]: """ This cell loops over the images in a netCDF. There are a couple of PCRGlobWB specific properties so be careful when using with other netCDFs. PCRGLOBWB specific properties include datatype (float32), nodata value, time format, minmax value etc. """ default_geotransform, default_geoprojection = aqueduct3.get_global_georeference( np.ones([Y_DIMENSION_5MIN, X_DIMENSION_5MIN])) for root, dirs, file_names in os.walk(ec2_input_path): for file_name in file_names: if file_name.endswith(".nc4") or file_name.endswith(".nc"): print(file_name) input_path = os.path.join(root, file_name) output_path = aqueduct3.netCDF4_to_geotiff(file_name, input_path, ec2_output_path, default_geotransform, default_geoprojection) # In[ ]: files = os.listdir(ec2_output_path) print("Number of files: " + str(len(files)))