def process_file(rap_file): provider_name = "RAP" srid = RAP_Spatial_Reference.epsg logger.info("Ingesting file %s" % rap_file) for variable in variables: logger.info("Processing variable %s" % variable) band_num = find_band_num(rap_file, filterr=variables[variable]) if band_num is None: #raise Exception("Could not find band for %s" % variable) logger.error("Could not find band for %s" % variable) else: vars = ["GRIB_REF_TIME", "GRIB_VALID_TIME"] datevalues = get_band_metadata(rap_file, band_num, vars) startdate_utc_str = (datevalues["GRIB_REF_TIME"].split())[0] enddate_utc_str = (datevalues["GRIB_VALID_TIME"].split())[0] start_date = datetime.utcfromtimestamp(float(startdate_utc_str)) #end_date = datetime.fromtimestamp(float(enddate_utc_str)) end_date = start_date + timedelta(hours=1) block_size = (10, 10) ras = GDALRaster(rap_file, srid) ras.set_band_num(band_num) if variable == "RAP_REFL": ras.nodata_range = [-999, -9] level = int((variables[variable]["GRIB_SHORT_NAME"].split("-"))[0]) granule_name = "%s_%s %s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d %H:%M"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d%H%M"), level) bbox = proj_helper.get_bbox(srid) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, start_time=start_date, end_time=end_date, subset_bbox=bbox, overwrite=True, threshold=None)
def ingest_gtopo_file(fmt): df = config.datafiles["GTOPO30_ELEV"] if isinstance(df["wildcard"], list): files = [] for wc in df["wildcard"]: files += base_ingestor.get_ingest_files(df["folder"], df["wildcard"]) else: files = base_ingestor.get_ingest_files(df["folder"], df["wildcard"]) gtopo_file = files[0] ras = GDALRaster(gtopo_file, srid) ras.nodata_value = -9999 bbox = proj_helper.get_bbox(srid) if fmt == 'ras': granule_name = "GTOPO30Elev_ras" level = 0 base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, start_time=dtime, end_time=datetime.max, subset_bbox=bbox, overwrite=True) pgdb_helper.submit(""" drop if exists index rastertile_geom_gist_idx; create index rastertile_geom_gist_idx on rastertile using gist(st_convexhull(rast)); """) if fmt == 'vec': granule_name = "GTOPO30Elev_vec" level = 1 base_ingestor.ingest_vector(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, start_time=dtime, end_time=datetime.max, subset_bbox=bbox, overwrite=True)
def ingest_gtopo_file(): provider_name = "GTOPO30" variable_name = "ELEV" df = config.datafiles["GTOPO30_ELEV"] if isinstance(df["wildcard"], list): files = [] for wc in df["wildcard"]: files += base_ingestor.get_ingest_files(df["folder"], df["wildcard"]) else: files = base_ingestor.get_ingest_files(df["folder"], df["wildcard"]) gtopo_file = files[0] srid = 4326 band_num = 1 dtime = datetime(year=1979, month=1, day=1, hour=0, minute=0, second=0) for block_size in block_sizes: level = block_size[0] granule_name = "GTOPO30Elev_%d" % level ras = GDALRaster(gtopo_file, srid) ras.nodata_value = -9999 bbox = proj_helper.get_bbox(srid) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, start_time=dtime, end_time=datetime.max, subset_bbox=bbox, overwrite=True)
def save_raster(lats, lons, t_start, t_end): x, y = proj_helper.latlon2xy1(lats, lons, RAP_Spatial_Reference.proj4) data = [1 for i in range(0, len(x))] array_raster = ArrayRaster(ds_name="", data_array=None, size=size, ul=ul, scale=scale, skew=(0, 0), srid=RAP_Spatial_Reference.epsg, gdal_datatype=gdalconst.GDT_Int16, nodata_value=999) array_raster.set_data_with_xy(x=x, y=y, data=data, stat="count") level = 0 block_size = 50, 50 #array_raster.size # 100, 100 variable_name = "CI_COUNT" provider_name = "MRMS" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=RAP_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=t_start, end_time=t_end, subset_bbox=bbox, overwrite=True) logger.info("Inserted %s" % granule_name)
def process_mrms_file(mrms_file): provider_name = "MRMS" variable_name = "REFL" ext_parts = os.path.splitext(mrms_file) ext = ext_parts[1] remove_after_process = False if ext == ".gz": nc_file_name = ext_parts[0] nc_file_copy = os.path.join("./", os.path.basename(nc_file_name)) if os.path.exists(nc_file_copy): mrms_file = nc_file_copy else: with open(nc_file_copy, 'wb') as nc_file: gz_file = gzip.open(mrms_file, 'rb') gz_bytes = gz_file.read() nc_file.write(gz_bytes) gz_file.close() mrms_file = nc_file_copy remove_after_process = True vars = nc_get_1d_vars_as_list(mrms_file, ["Ht", "time"]) heights = vars["Ht"] times = vars["time"] srid = 4326 #dtime = datetime(year=2014, month=8, day=18, hour=19, minute=0, second=0) dtime = datetime.utcfromtimestamp(times[0]) bbox = proj_helper.get_bbox(srid) start_time = dtime end_time = dtime + timedelta(minutes=2) for block_size in block_sizes: level = block_size[0] #put various tiles in various levels granule_name = "%s_%s %s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d %H:%M"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d%H%M"), level) bottom_up_data = True ras = GDALRaster(mrms_file, srid, bottom_up_data) l = 14 ras.set_band_num(l + 1) #explicitly override the noddata_value since netcdf file is not correct ras.nodata_value = -999 ras.nodata_range = (-999, 0) #ras.reclassifier_callback = cb base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, subset_bbox=bbox, start_time=start_time, end_time=end_time, overwrite=True, threshold=34) if remove_after_process: os.remove(mrms_file)
granule_name = "%s_%s %s_%d" % (provider_name, variable_name, start_date.strftime("%Y%m%d"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable_name, start_date.strftime("%Y%m%d"), level) ras = GDALRaster(sds, srid) #ras.nodata_range = [0.5, 256] #only 0 (water) bbox = proj_helper.get_bbox(srid) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, subset_bbox=bbox, start_time=start_date, end_time=end_date, overwrite=True) #also ingest same data as land water mask for hdf_file in files: sds = get_sds(hdf_file, "Land_Cover_Type_2") provider_name = "MODIS" variable_name = "WATERBODY" vars = ["RANGEBEGINNINGDATE", "RANGEENDINGDATE"] datevalues = get_metadata(hdf_file, vars)
def process_file(ahps_file): logger.info("Processing %s" % ahps_file) #ahps_file = get_ingest_file_path(r'AHPS_Precip_1day/nws_precip_conus_20140722.nc') vars = nc_get_1d_vars_as_list( ahps_file, ["timeofdata", "lat", "lon", "true_lat", "true_lon"]) time_chars = vars["timeofdata"] lat = vars["lat"] lon = vars["lon"] true_lat = vars["true_lat"] true_lon = -1 * vars["true_lon"] #bottom-left, bottom-right, top-right and top-left bottom_left = lat[0], -1 * lon[0] bottom_right = lat[1], -1 * lon[1] top_right = lat[2], -1 * lon[2] top_left = lat[3], -1 * lon[3] bottom_left_xy = proj_helper.latlon2xy(bottom_left[0], bottom_left[1], SRID_HRAP) bottom_right_xy = proj_helper.latlon2xy(bottom_right[0], bottom_right[1], SRID_HRAP) top_left_xy = proj_helper.latlon2xy(top_left[0], top_left[1], SRID_HRAP) top_right_xy = proj_helper.latlon2xy(top_right[0], top_right[1], SRID_HRAP) time_str = "".join([ch for ch in time_chars]) dtime = datetime.strptime(time_str, "%Y%m%d%HZ") logger.info("write to postgis - %s" % ahps_file) block_size = (50, 50) level = 0 ras = GDALRaster(ahps_file, SRID_HRAP) ras.set_band_num(1) ras.nodata_value = -1 ras.nodata_range = (-1, 1) scale_x1 = (top_right_xy[0] - top_left_xy[0]) / ras.size[0] scale_x2 = (bottom_right_xy[0] - bottom_left_xy[0]) / ras.size[0] scale_y1 = (bottom_right_xy[1] - top_right_xy[1]) / ras.size[1] scale_y2 = (bottom_left_xy[1] - top_left_xy[1]) / ras.size[1] scale_x = scale_x1 scale_y = scale_y1 skew_y = 0 skew_x = 0 ul_x = top_left_xy[0] ul_y = top_left_xy[1] #explicitly set project params since netcdf file does not have it ras.scale = (scale_x, scale_y) ras.ul = (ul_x, ul_y) ras.skew = (skew_x, skew_y) ras.geo_bounds = [ ras.ul[0], ras.ul[0] + ras.size[0] * ras.scale[0], ras.ul[1], ras.ul[1] + ras.size[1] * ras.scale[1] ] granule_name = "%s_%s %s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d %H:%M"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d%H%M"), level) bbox = proj_helper.get_bbox(SRID_HRAP) #bbox = None start_time = dtime end_time = dtime + timedelta(days=1) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=SRID_HRAP, level=level, block_size=block_size, dynamic=False, subset_bbox=bbox, start_time=start_time, end_time=end_time, overwrite=True)
def process_file(tf): fname = tf["file"] prev_time = tf["nt"] dtime = tf["dt"] logger.info("Processing file %s " % fname) ext_parts = os.path.splitext(fname) ext = ext_parts[1] remove_after_process = False if ext == ".gz": nc_file_name = ext_parts[0] nc_file_copy = os.path.join(os.path.dirname(fname), nc_file_name) with open(nc_file_copy, 'wb') as nc_file: gz_file = gzip.open(fname, 'rb') gz_bytes = gz_file.read() nc_file.write(gz_bytes) gz_file.close() data_file = nc_file_copy remove_after_process = True else: data_file = fname provider_name = "GOES" #variable_name = "CLOUDTYPE" cloud_mask = bin_reader(data_file, typechar='f', chunk_size=CHUNK_SIZE, recycle=False) cum_vals = [] tcum_vals = [] num_chunk = 0 while True: try: val_chunk = cloud_mask.next() for k in range(0, len(val_chunk), 1): if (num_chunk * CHUNK_SIZE + k) in indexes: #special masking for goes cm data, only include type 2 & 4 if val_chunk[k] == 2: #cumulus clouds cum_vals.append(1) else: cum_vals.append(0) if val_chunk[k] == 4: #towering cumulus clouds tcum_vals.append(1) else: tcum_vals.append(0) num_chunk += 1 except StopIteration: break level = 0 block_size = 50, 50 #array_raster.size # 100, 100 variable_name = "CUM_CLOUD" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) array_raster.set_data_with_xy(x=all_x, y=all_y, data=cum_vals) array_raster.dsname = granule_name base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True) variable_name = "TCUM_CLOUD" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) array_raster.set_data_with_xy(x=all_x, y=all_y, data=tcum_vals) array_raster.dsname = granule_name base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True) if remove_after_process: os.remove(data_file)