def make_out_path(mcmip_file, out_dir):
    fn = GOESFilename.from_path(mcmip_file)
    ftime = fn.start + (fn.end - fn.start) / 2
    dir_ = Path(out_dir) / ftime.strftime("%Y/%m/%d")
    dir_.mkdir(parents=True, exist_ok=True)
    return dir_ / ftime.strftime(
        f"erebos_{fn.product}{fn.sector}_{fn.satellite}_%Y%m%dT%H%M%SZ.nc")
Пример #2
0
def match_calipso_goes_times(calipso_dir, goes_dir, goes_glob):
    out = []
    goes_files = [GOESFilename.from_path(f) for f in goes_dir.glob(goes_glob)]
    goes_files = {f.start.round("5min"): f.filename for f in goes_files}
    for cf in calipso_dir.glob("*.hdf"):
        with xr.open_dataset(cf, engine="pynio") as cds:
            ctime = pd.Timestamp(cds.erebos.mean_time).round("5min")
        if ctime in goes_files:
            out.append((cf, goes_files[ctime]))
        else:
            out.append((cf, None))
    return out
def generate_single_chan_prefixes(mcmip_file, bucket):
    """
    From a CMIP or MCMIP filename, find the s3 keys for the
    16 indv. channels made at the same time in bucket
    """
    fn = GOESFilename.from_path(mcmip_file)
    s3 = boto3.client("s3")

    for chan in range(1, 17):
        prefix = fn.to_s3_prefix(channel=chan, product="CMIP")
        resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        if resp["KeyCount"] == 0:
            raise KeyError(f"No keys with prefix {prefix}")
        key = resp["Contents"][0]["Key"]
        yield chan, key
Пример #4
0
def combine_goes_files_at_time(base_path, first_ds):
    var_map = {"ACHA": "HT", "ACM": "BCM", "ACTP": "Phase", "COD": "COD"}
    gfile = GOESFilename.from_path(base_path)
    out = first_ds.copy()
    dir_ = base_path.parent / "../CMIP"
    for chan in range(2, 17):
        out = add_variables_to_out(out, dir_, gfile, "CMI", chan)
    dir_ = base_path.parent / "../Rad"
    for chan in range(1, 17):
        out = add_variables_to_out(out,
                                   dir_,
                                   gfile,
                                   "Rad",
                                   chan,
                                   processing_level="L1b",
                                   product="Rad")
    for prod, var in var_map.items():
        dir_ = base_path.parent / ".." / prod
        out = add_variables_to_out(out, dir_, gfile, var, chan=0, product=prod)
    return out
Пример #5
0
def combine_calipso_goes_files(calipso_dir,
                               goes_dir,
                               save_dir,
                               goes_glob,
                               calipso_glob,
                               limits=(0, None)):
    calipso_files = list(calipso_dir.glob(calipso_glob))[slice(*limits)]
    goes_files = [
        GOESFilename(f, start=pd.Timestamp(f.name.split("_")[0], tz="UTC"))
        for f in goes_dir.glob(goes_glob)
    ]
    for cfile in calipso_files:
        logging.info("Processing %s", cfile)
        gfile = match_goes_file(cfile, goes_files)
        if gfile is None:
            logging.warning("No matching GOES file for %s", cfile)
            continue

        filename = save_dir / gfile.name

        if filename.exists():
            logging.info("File already exists at %s", filename)
            continue

        ds = make_combined_dataset(
            cfile,
            gfile,
            [
                "cloud_top_altitude",
                "cloud_thickness",
                "cloud_base_altitude",
                "cloud_layers",
                "solar_azimuth",
                "solar_zenith",
            ],
            ["cloud_type", "day_night_flag", "surface_elevation"],
        )
        logging.info("Saving file to %s", filename)
        ds.to_netcdf(filename, engine="netcdf4")
        ds.close()
Пример #6
0
def combine_goes_files(base_dir):
    cmip_c01_files = (base_dir / "CMIP").glob("*C01*")
    outdir = base_dir / "combined"
    outdir.mkdir(parents=True, exist_ok=True)
    for afile in cmip_c01_files:
        final_path = outdir / GOESFilename.from_path(afile).start.strftime(
            "%Y%m%dT%H%M%S_combined.nc")
        if final_path.is_file():
            logging.info("Path exists at %s", final_path)
            continue
        orig = xr.open_dataset(afile, engine="netcdf4").load()
        ds = prep_first_file(orig).load()
        orig.close()
        out = combine_goes_files_at_time(afile, ds)
        ds.close()
        out.attrs["erebos_version"] = __version__
        logging.info("Saving file to %s", final_path)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            out.to_netcdf(final_path, engine="netcdf4")
        out.close()
        logging.info("Done")
Пример #7
0

if __name__ == "__main__":
    logging.basicConfig(format="%(asctime)s %(message)s", level="INFO")

    with open("height.pkl", "rb") as f:
        height_model = pickle.load(f)

    with open("cloud_mask.pkl", "rb") as f:
        mask_model = pickle.load(f)
    with open("cloud_type.pkl", "rb") as f:
        type_model = pickle.load(f)

    site_data = xr.open_dataset("/storage/projects/goes_alg/site_data.nc")
    goes_files = [
        GOESFilename.from_path(f)
        for f in Path("/storage/projects/goes_alg/goes_data/west/CMIP").glob(
            "*MCMIPC*.nc")
    ]

    final_countdown = []
    for gfile in goes_files:
        if gfile.start.hour < 13:
            continue
        logging.info("Processing file from %s", gfile.start)
        with xr.open_dataset(gfile.filename) as goes_ds:
            tomerge = []
            for _, site in site_data.groupby("site"):
                tomerge.append(process_site(goes_ds, site))
            final_countdown.append(xr.merge(tomerge))
    output = xr.merge(final_countdown)
import logging
from pathlib import Path

from erebos import prep
from erebos.adapters.goes import GOESFilename

logging.basicConfig(format="%(asctime)s %(levelno)s %(message)s", level="INFO")

calipso_dir = Path("/storage/projects/goes_alg/calipso/west/1km_cloud/")
goes_dir = Path("/storage/projects/goes_alg/goes_data/west/CMIP/")
xml_dir = Path("/storage/projects/goes_alg/goes_data/west/xml/")
product_names = ([("ABI-L2-MCMIPC", None)] + [("ABI-L2-CMIPC", band)
                                              for band in range(1, 17)] +
                 [("ABI-L1b-RadC", band) for band in range(1, 17)])
prep.download_corresponding_goes_files(
    calipso_dir,
    goes_dir,
    bucket_name="noaa-goes16",
    product_names_bands=product_names,
    checkpoint=True,
    cglob="*D_Sub*.hdf",
)
xml_dir.mkdir(parents=True, exist_ok=True)
for gfile in goes_dir.glob("*CMIPC*C01*.nc"):
    gcf = GOESFilename.from_path(gfile)
    prep.create_class_search_xml(gcf, xml_dir)
for xml_file in xml_dir.glob("*.xml"):
    if (xml_file.parent / "retrieved" / xml_file.name).exists() or (
            xml_file.parent / "processing" / xml_file.name).exists():
        xml_file.unlink()