def make_out_path(mcmip_file, out_dir): fn = GOESFilename.from_path(mcmip_file) ftime = fn.start + (fn.end - fn.start) / 2 dir_ = Path(out_dir) / ftime.strftime("%Y/%m/%d") dir_.mkdir(parents=True, exist_ok=True) return dir_ / ftime.strftime( f"erebos_{fn.product}{fn.sector}_{fn.satellite}_%Y%m%dT%H%M%SZ.nc")
def match_calipso_goes_times(calipso_dir, goes_dir, goes_glob): out = [] goes_files = [GOESFilename.from_path(f) for f in goes_dir.glob(goes_glob)] goes_files = {f.start.round("5min"): f.filename for f in goes_files} for cf in calipso_dir.glob("*.hdf"): with xr.open_dataset(cf, engine="pynio") as cds: ctime = pd.Timestamp(cds.erebos.mean_time).round("5min") if ctime in goes_files: out.append((cf, goes_files[ctime])) else: out.append((cf, None)) return out
def generate_single_chan_prefixes(mcmip_file, bucket): """ From a CMIP or MCMIP filename, find the s3 keys for the 16 indv. channels made at the same time in bucket """ fn = GOESFilename.from_path(mcmip_file) s3 = boto3.client("s3") for chan in range(1, 17): prefix = fn.to_s3_prefix(channel=chan, product="CMIP") resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) if resp["KeyCount"] == 0: raise KeyError(f"No keys with prefix {prefix}") key = resp["Contents"][0]["Key"] yield chan, key
def combine_goes_files_at_time(base_path, first_ds): var_map = {"ACHA": "HT", "ACM": "BCM", "ACTP": "Phase", "COD": "COD"} gfile = GOESFilename.from_path(base_path) out = first_ds.copy() dir_ = base_path.parent / "../CMIP" for chan in range(2, 17): out = add_variables_to_out(out, dir_, gfile, "CMI", chan) dir_ = base_path.parent / "../Rad" for chan in range(1, 17): out = add_variables_to_out(out, dir_, gfile, "Rad", chan, processing_level="L1b", product="Rad") for prod, var in var_map.items(): dir_ = base_path.parent / ".." / prod out = add_variables_to_out(out, dir_, gfile, var, chan=0, product=prod) return out
def combine_calipso_goes_files(calipso_dir, goes_dir, save_dir, goes_glob, calipso_glob, limits=(0, None)): calipso_files = list(calipso_dir.glob(calipso_glob))[slice(*limits)] goes_files = [ GOESFilename(f, start=pd.Timestamp(f.name.split("_")[0], tz="UTC")) for f in goes_dir.glob(goes_glob) ] for cfile in calipso_files: logging.info("Processing %s", cfile) gfile = match_goes_file(cfile, goes_files) if gfile is None: logging.warning("No matching GOES file for %s", cfile) continue filename = save_dir / gfile.name if filename.exists(): logging.info("File already exists at %s", filename) continue ds = make_combined_dataset( cfile, gfile, [ "cloud_top_altitude", "cloud_thickness", "cloud_base_altitude", "cloud_layers", "solar_azimuth", "solar_zenith", ], ["cloud_type", "day_night_flag", "surface_elevation"], ) logging.info("Saving file to %s", filename) ds.to_netcdf(filename, engine="netcdf4") ds.close()
def combine_goes_files(base_dir): cmip_c01_files = (base_dir / "CMIP").glob("*C01*") outdir = base_dir / "combined" outdir.mkdir(parents=True, exist_ok=True) for afile in cmip_c01_files: final_path = outdir / GOESFilename.from_path(afile).start.strftime( "%Y%m%dT%H%M%S_combined.nc") if final_path.is_file(): logging.info("Path exists at %s", final_path) continue orig = xr.open_dataset(afile, engine="netcdf4").load() ds = prep_first_file(orig).load() orig.close() out = combine_goes_files_at_time(afile, ds) ds.close() out.attrs["erebos_version"] = __version__ logging.info("Saving file to %s", final_path) with warnings.catch_warnings(): warnings.simplefilter("ignore") out.to_netcdf(final_path, engine="netcdf4") out.close() logging.info("Done")
if __name__ == "__main__": logging.basicConfig(format="%(asctime)s %(message)s", level="INFO") with open("height.pkl", "rb") as f: height_model = pickle.load(f) with open("cloud_mask.pkl", "rb") as f: mask_model = pickle.load(f) with open("cloud_type.pkl", "rb") as f: type_model = pickle.load(f) site_data = xr.open_dataset("/storage/projects/goes_alg/site_data.nc") goes_files = [ GOESFilename.from_path(f) for f in Path("/storage/projects/goes_alg/goes_data/west/CMIP").glob( "*MCMIPC*.nc") ] final_countdown = [] for gfile in goes_files: if gfile.start.hour < 13: continue logging.info("Processing file from %s", gfile.start) with xr.open_dataset(gfile.filename) as goes_ds: tomerge = [] for _, site in site_data.groupby("site"): tomerge.append(process_site(goes_ds, site)) final_countdown.append(xr.merge(tomerge)) output = xr.merge(final_countdown)
import logging from pathlib import Path from erebos import prep from erebos.adapters.goes import GOESFilename logging.basicConfig(format="%(asctime)s %(levelno)s %(message)s", level="INFO") calipso_dir = Path("/storage/projects/goes_alg/calipso/west/1km_cloud/") goes_dir = Path("/storage/projects/goes_alg/goes_data/west/CMIP/") xml_dir = Path("/storage/projects/goes_alg/goes_data/west/xml/") product_names = ([("ABI-L2-MCMIPC", None)] + [("ABI-L2-CMIPC", band) for band in range(1, 17)] + [("ABI-L1b-RadC", band) for band in range(1, 17)]) prep.download_corresponding_goes_files( calipso_dir, goes_dir, bucket_name="noaa-goes16", product_names_bands=product_names, checkpoint=True, cglob="*D_Sub*.hdf", ) xml_dir.mkdir(parents=True, exist_ok=True) for gfile in goes_dir.glob("*CMIPC*C01*.nc"): gcf = GOESFilename.from_path(gfile) prep.create_class_search_xml(gcf, xml_dir) for xml_file in xml_dir.glob("*.xml"): if (xml_file.parent / "retrieved" / xml_file.name).exists() or ( xml_file.parent / "processing" / xml_file.name).exists(): xml_file.unlink()