示例#1
0
def test_create_dt_fpath_subdir_dt():

    fpath_should = "/example/2000/01/file20000101name.nc"
    fpath = create_dt_fpath(datetime(2000, 1, 1),
                            "/example",
                            "file%Y%m%dname.nc",
                            subdirs=["%Y", "%m"])
    assert fpath == fpath_should
示例#2
0
def test_create_dt_fpath_fname_dt():

    fpath_should = "/example/sub1/sub2/file20000101name.nc"
    fpath = create_dt_fpath(datetime(2000, 1, 1),
                            "/example",
                            "file%Y%m%dname.nc",
                            subdirs=["sub1", "sub2"])
    assert fpath == fpath_should
def save_gribs_from_grib(
        input_grib,
        output_path,
        product_name,
        filename_templ="{product}_OPER_0001_AN_%Y%m%d_%H%M.grb"):
    """
    takes monthly grib files as downloaded by the function above and saves each time step
    in a separate file

    Parameters
    ----------
    input_nc : string
        filepath of the downloaded .grb file
    output_path : string
        where to save the resulting grib files
    product_name : string
        name of the ECMWF model (for filename generation)
    local_subdirs : list, optional
        List of subfolders for organizing downloaded data
    filename_templ : string, optional
        template for naming each separated nc file
    """
    localsubdirs = ['%Y', '%j']
    grib_in = pygrib.open(input_grib)

    grib_in.seek(0)
    for grb in grib_in:
        template = filename_templ
        param_id = grb['marsParam']
        #N = grb['N']
        step = grb['startStep']
        filedate = datetime(grb['year'], grb['month'], grb['day'], grb['hour'])

        template = template.format(product=product_name)
        #param_id=param_id,
        #N=N)

        filepath = create_dt_fpath(filedate,
                                   root=output_path,
                                   fname=template,
                                   subdirs=localsubdirs)
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        grb_out = open(filepath, 'ab')

        grb_out.write(grb.tostring())
        grb_out.close()
    grib_in.close()
示例#4
0
def save_gribs_from_grib(
    input_grib,
    output_path,
    product_name,
    filename_templ="{product}_AN_%Y%m%d_%H%M.grb",
    keep_original=True,
):
    """
    Split the downloaded grib file into daily files and add to folder structure
    necessary for reshuffling.

    Parameters
    ----------
    input_grib : str
        Filepath of the downloaded .grb file
    output_path : str
        Where to save the resulting grib files
    product_name : str
        Name of the ECMWF model (only for filename generation)
    filename_templ : str, optional (default: product_OPER_0001_AN_date_time)
        Template for naming each separated grb file
    """
    localsubdirs = ["%Y", "%j"]
    grib_in = pygrib.open(input_grib)

    grib_in.seek(0)
    for grb in grib_in:
        template = filename_templ
        filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"])

        template = template.format(product=product_name)

        filepath = create_dt_fpath(filedate,
                                   root=output_path,
                                   fname=template,
                                   subdirs=localsubdirs)

        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        grb_out = open(filepath, "ab")

        grb_out.write(grb.tostring())
        grb_out.close()
    grib_in.close()
    if not keep_original:
        os.remove(input_grib)
def save_ncs_from_nc(input_nc,
                     output_path,
                     product_name,
                     filename_templ='{product}_{gridsize}_%Y%m%d_%H%M.nc'):
    """
    takes monthly netcdf files as downloaded by the function above and saves each time step
    in a separate file

    Parameters
    ----------
    input_nc : string
        filepath of the downloaded .nc file
    output_path : string
        where to save the resulting netcdf files
    product_name : string
        name of the ECMWF model (for filename generation)
    local_subdirs : list, optional
        List of subfolders for organizing downloaded data
    filename_templ : string, optional
        template for naming each separated nc file
    """
    localsubdirs = ['%Y', '%j']

    nc_in = xr.open_dataset(input_nc, mask_and_scale=True)
    latdiff = np.abs(np.round(np.ediff1d(nc_in.latitude.values), 3))[0]
    londiff = np.abs(np.round(np.ediff1d(nc_in.longitude.values), 3))[0]
    gridsize = '%s_%s' % (str(latdiff), str(londiff))

    filename_templ = filename_templ.format(product=product_name,
                                           gridsize=gridsize)
    for time in nc_in.time.values:
        subset = nc_in.sel(time=time)

        timestamp = pd.Timestamp(time).to_pydatetime()
        filepath = create_dt_fpath(timestamp,
                                   root=output_path,
                                   fname=filename_templ,
                                   subdirs=localsubdirs)
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        subset.to_netcdf(filepath)
    nc_in.close()
示例#6
0
def save_ncs_from_nc(input_nc,
                     output_path,
                     product_name,
                     filename_templ='{product}_AN_%Y%m%d_%H%M.nc'):
    """
    Split the downloaded netcdf file into daily files and add to folder structure
    necessary for reshuffling.

    Parameters
    ----------
    input_nc : str
        Filepath of the downloaded .nc file
    output_path : str
        Where to save the resulting netcdf files
    product_name : str
        Name of the ECMWF model (only for filename generation)
    filename_templ : str, optional (default: product_grid_date_time)
        Template for naming each separated nc file
    """
    localsubdirs = ['%Y', '%j']

    nc_in = xr.open_dataset(input_nc, mask_and_scale=True)

    filename_templ = filename_templ.format(product=product_name)

    for time in nc_in.time.values:
        subset = nc_in.sel(time=time)
        timestamp = pd.Timestamp(time).to_pydatetime()
        filepath = create_dt_fpath(timestamp,
                                   root=output_path,
                                   fname=filename_templ,
                                   subdirs=localsubdirs)
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        # same compression for all variables
        var_encode = {'zlib': True, 'complevel': 6}
        subset.to_netcdf(
            filepath, encoding={var: var_encode
                                for var in subset.variables})
    nc_in.close()
示例#7
0
def save_ncs_from_nc(
    input_nc,
    output_path,
    product_name,
    filename_templ="{product}_AN_%Y%m%d_%H%M.nc",
    grid=None,
    keep_original=True,
    remap_method="bil",
):
    """
    Split the downloaded netcdf file into daily files and add to folder
    structure necessary for reshuffling.

    Parameters
    ----------
    input_nc : str
        Filepath of the downloaded .nc file
    output_path : str
        Where to save the resulting netcdf files
    product_name : str
        Name of the ECMWF model (only for filename generation)
    filename_templ : str, optional (default: product_grid_date_time)
        Template for naming each separated nc file

    keep_original: bool
        keep the original downloaded data
    """
    localsubdirs = ["%Y", "%j"]

    nc_in = xr.open_dataset(input_nc, mask_and_scale=True)

    filename_templ = filename_templ.format(product=product_name)

    if grid is not None:
        if not cdo_available:
            raise CdoNotFoundError()

        cdo = Cdo()
        gridpath = os.path.join(output_path, "grid.txt")
        weightspath = os.path.join(output_path, "remap_weights.nc")
        if not os.path.exists(gridpath):
            with open(gridpath, "w") as f:
                for k, v in grid.items():
                    f.write(f"{k} = {v}\n")

    for time in nc_in.time.values:
        subset = nc_in.sel(time=time)
        timestamp = pd.Timestamp(time).to_pydatetime()
        filepath = create_dt_fpath(
            timestamp,
            root=output_path,
            fname=filename_templ,
            subdirs=localsubdirs,
        )
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        if grid is not None:

            if not os.path.exists(weightspath):
                # create weights file
                getattr(cdo, "gen" + remap_method)(gridpath,
                                                   input=subset,
                                                   output=weightspath)
            subset = cdo.remap(
                ",".join([gridpath, weightspath]),
                input=subset,
                returnXDataset=True,
            )

        # same compression for all variables
        var_encode = {"zlib": True, "complevel": 6}
        subset.to_netcdf(
            filepath, encoding={var: var_encode
                                for var in subset.variables})
    nc_in.close()
    if not keep_original:
        os.remove(input_nc)
    if grid is not None:
        cdo.cleanTempDir()