Python load_dataset示例，xarray.load_dataset Python示例

示例#1

0

显示文件

文件： results.py 项目： simonpf/joint_flight

def get_results(flight, config="", group="All quantities"):
    """
    This function loads the retrieval results from a given flight
    Results are automatically augmented with the retrieved iwc.

    Args:
        flight: The flight name, i.e. b984, c159 or c161
        config: Name of retrieval configuration that defines from which sub-folder
            (if any) the data is loaded.
        group: The NetCDF4 group containing the results.
    """
    flight = flight.lower()
    path = os.path.join(joint_flight.PATH, "data", "old")
    if config != "":
        path = os.path.join(path, config)
    pattern = re.compile(f"output_{flight}_([\w-]*).nc")

    results = {}

    psd = D14NDmIce()

    for f in glob.glob(os.path.join(path, "*")):
        match = re.match(pattern, os.path.basename(f))
        if match is None:
            continue
        shape = match.group(1)
        data = xr.load_dataset(os.path.join(path, f), group=group)
        dm = data["ice_dm"]
        n0 = data["ice_n0"]
        psd.mass_weighted_diameter = dm
        psd.intercept_parameter = n0
        wc = psd.get_mass_density()

        k = np.ones((5, 1)) / 5.0
        wc_s = convolve(wc, k, mode="same")

        nd = psd.get_moment(0)
        nd.data[wc < 5e-6] = 0.0
        data["ice_water_content"] = (dm.dims, wc.data)
        data["ice_water_content_smooth"] = (dm.dims, wc_s.data)
        data["number_density"] = (dm.dims, nd.data)
        results[shape] = data
    return results

示例#2

0

显示文件

文件： __init__.py 项目： LSaffin/twin-otter

def load_nc(path, time):
    """Load the netCDF dataset corresponding to the given time

    This function finds files matching the AERIS :data:`nc_filename` formatted for the
    given time

    Args:
        path (str): The directory containing GOES netCDF files
        time (datetime.datetime): The time of the file to load

    Returns:
        xarray.dataset: The loaded netCDF file with values filtered where the
            coordinates are NaN

    Raises:
        FileNotFoundError: If the netCDF is not present

        FileExistsError: If multiple matching netCDF files are found with the same name
    """
    filename = nc_filename.format(
        year=time.year,
        day=time.timetuple().tm_yday,
        hour=time.hour,
        minute=time.minute,
        something="*",
    )

    # Find matching filenames in the GOES folder
    file_path = list(pathlib.Path(path).rglob(filename))

    if len(file_path) == 0:
        raise FileNotFoundError("No GOES data found in {} for {}".format(
            path, time))
    elif len(file_path) > 1:
        raise FileExistsError("More than one file found in {} for {}".format(
            path, time))

    dataset = xr.load_dataset(str(file_path[0]))

    # Remove values where the coordinates are NaNs
    dataset = dataset.where(~dataset.longitude.isnull(), drop=True)

    return dataset

示例#3

0

显示文件

文件： workflow_manager.py 项目： sevberg/RESKit

def load_workflow_result(datasets,
                         loader=xarray.load_dataset,
                         sortby='location'):

    if isinstance(datasets, str):
        if isdir(datasets):
            datasets = glob(join(datasets, "*.nc"))
        else:
            datasets = glob(datasets)

    if len(datasets) == 1:
        ds = xarray.load_dataset(datasets[0]).sortby('locations')
    else:
        ds = xarray.concat(map(loader, datasets), dim="location")

    if sortby is not None:
        ds = ds.sortby(sortby)

    return ds

示例#4

0

显示文件

文件： axis_process.py 项目： ZiskinZiv/PW_from_GPS

def read_and_concat_smoothFinals(rinexpath, solution='Final'):
    import xarray as xr
    from aux_gps import save_ncfile
    from aux_gps import path_glob
    years = [x.as_posix().split('/')[-1] for x in path_glob(rinexpath, '*/')]
    years = [x for x in years if x.isnumeric()]
    for year in years:
        dsl = []
        # doys = [x.as_posix().split('/')[-1] for x in path_glob(rinexpath/year, '*/')]
        for doypath in path_glob(rinexpath/year, '*/'):
            file = doypath / 'dr' / solution / 'smoothFinal.nc'
            if file.is_file():
                dsl.append(xr.load_dataset(file))
                print('found smoothFinal.nc in {}'.format(doypath))
        if dsl:
            ds = xr.concat(dsl, 'time')
            ds = ds.sortby('time')
            save_ncfile(ds, rinexpath, 'smoothFinal_{}.nc'.format(year))
    return ds

示例#5

0

显示文件

    def process_month(self, year, month, output_path):
        files = {
            gn: fs
            for gn, fs in self.validation_files.items()
            if fs["date"].year == year and fs["date"].month == month
        }

        parts = []

        for gn, fs in files.items():
            match_up_file = fs["match_up_file"]
            gprof_file = fs["gprof_file"]
            qprof_file = fs["qprof_file"]

            match_up_data = xr.load_dataset(match_up_file)
            gprof_data = RetrievalFile(
                gprof_file, has_sensitivity=True).to_xarray_dataset()
            qprof_data = RetrievalFile(qprof_file).to_xarray_dataset()

            match_up_data = match_up_data.stack(samples=("scans", "pixels"))
            gprof_data = gprof_data.stack(samples=("scans", "pixels"))
            qprof_data = qprof_data.stack(samples=("scans", "pixels"))

            valid = np.isfinite(match_up_data["surface_precipitation"])
            valid *= gprof_data["surface_precip"] >= 0.0

            match_up_data = match_up_data.isel(samples=valid).rename(
                {"surface_precipitation": "surface_precip_mrms"})
            gprof_data = gprof_data.isel(samples=valid)
            gprof_data = gprof_data[["surface_precip"]].rename(
                {"surface_precip": "surface_precip_gprof"})
            qprof_data = qprof_data.isel(samples=valid)
            qprof_data = qprof_data[["surface_precip"]].rename(
                {"surface_precip": "surface_precip_qprof"})
            merged = xr.merge([match_up_data, gprof_data, qprof_data])
            parts.append(merged.reset_index("samples"))

        results = xr.concat(parts, dim="samples")

        output_path = Path(output_path)
        if output_path.is_dir():
            output_path = output_path = f"validation_results_{year}_{month:02}.nc"
        results.to_netcdf(output_path)

示例#6

0

显示文件

def test_goes_mag():

    fname = 'dn_magn-l2-hires_g17_d20211219_v1-0-1.nc'
    url = (
        "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/dn_magn-l2-hires_g17_d20211219_v1-0-1.nc")
    if not os.path.exists(fname):
        urllib.request.urlretrieve(url, fname)

    c = xr.load_dataset("dn_magn-l2-hires_g17_d20211219_v1-0-1.nc")
    for var in c:
        c[var].attrs['VAR_TYPE'] = 'data'
    c['coordinate'].attrs['VAR_TYPE'] = 'support_data'
    c['time'].attrs['VAR_TYPE'] = 'support_data'
    c['time_orbit'].attrs['VAR_TYPE'] = 'support_data'
    cdflib.xarray_to_cdf(c, 'dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf')
    d = cdflib.cdf_to_xarray('dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf', to_unixtime=True,
                             fillval_to_nan=True)
    os.remove('dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf')
    os.remove('dn_magn-l2-hires_g17_d20211219_v1-0-1.nc')

示例#7

0

显示文件

文件： io.py 项目： edmundhenley/enlilviz

def read_evo(filename):
    """Load an `evo` post-processed Enlil file into an Evolution object.

    Parameters
    ----------
    filename : str
        netcdf Enlil post-processed output file
        Example: evo.earth.nc

    Returns
    -------
    enlil.Evolution
        An Evolution class representing the loaded file.
    """
    ds = xr.load_dataset(filename)

    # Change the dimension to time
    # Depending on which version, the key could be rundate or refdate
    try:
        t0 = np.datetime64(ds.attrs['rundate_cal'], 's')
    except KeyError:
        t0 = np.datetime64(ds.attrs['refdate_cal'], 's')
    time = t0 + np.array(ds['TIME'], np.timedelta64)
    ds = ds.rename({'nevo': 'earth_t'}).assign_coords({'earth_t': time})
    ds = ds.drop(['TIME', 'DT', 'NSTEP'])

    for var in _variables_evo:
        if var not in ds:
            continue

        da = ds[var]
        # Update the name to be consistent across data sets
        name = _variables_evo[var]
        da.name = name

        # Unit conversions
        da = _transform_variable(da)
        ds[name] = da
        ds = ds.drop(var)
        ds.attrs['name'] = ds.label

    return Evolution(ds)

示例#8

0

显示文件

def get_infos(subd):

    files = glob.glob(f"{dirgrid}/{subd:02}/*nc")
    tiles = sorted([int(f.split(".")[1]) for f in files])
    ntiles = len(files)

    grdfile = files[0]

    ds = xr.load_dataset(grdfile)

    headersize = 29400
    stripes = {"size": 177848320, "roundingsize": 1024}

    dims = {"chunk": ntiles // 100 + 1, "tile": 100}
    attrs = ds.attrs
    varlist = list(ds.variables)

    attrs.pop("_NCProperties")
    attrs["partition_ucla"] = [0, 10000, 1, 1]
    varlist.remove("spherical")

    variables = {}
    for name in varlist:
        var = ds.variables[name]
        variables[name] = {"shape": list(var.shape), "dtype": var.dtype.name}

        vattrs = var.attrs
        for key, value in var.attrs.items():
            if isinstance(value, np.float64):
                vattrs[key] = float(value)
            if isinstance(value, np.int32):
                vattrs[key] = int(value)
        variables[name].update(vattrs)

    infos = {}
    infos["headersize"] = headersize
    infos["stripes"] = stripes
    infos["dimensions"] = dims
    infos["variables"] = variables
    infos["attrs"] = attrs
    infos["tiles"] = tiles
    return infos

示例#9

0

显示文件

def test_saber():

    fname = 'SABER_L2B_2021020_103692_02.07.nc'
    url = (
        "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/SABER_L2B_2021020_103692_02.07.nc")
    if not os.path.exists(fname):
        urllib.request.urlretrieve(url, fname)

    c = xr.load_dataset("SABER_L2B_2021020_103692_02.07.nc")
    for var in c:
        c[var].attrs['VAR_TYPE'] = 'data'
    c['event'].attrs['VAR_TYPE'] = 'support_data'
    c['sclatitude'].attrs['VAR_TYPE'] = 'support_data'
    c['sclongitude'].attrs['VAR_TYPE'] = 'support_data'
    c['scaltitude'].attrs['VAR_TYPE'] = 'support_data'
    cdflib.xarray_to_cdf(c, 'SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf')
    d = cdflib.cdf_to_xarray('SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf', to_unixtime=True,
                             fillval_to_nan=True)
    os.remove('SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf')
    os.remove('SABER_L2B_2021020_103692_02.07.nc')

示例#10

0

显示文件

文件： 0.func_format_rcmipII_nc.py 项目： ashiklom/hector-rcmip

def convert_to_rcmip_nc(fname):
    ds = xr.load_dataset(fname)

    # convert years to datetimes
    ds["time"] = np.array([dt.datetime(y, 1, 1) for y in ds["time"][:]],
                          dtype="datetime64[s]").astype("float")

    for v in ds.variables:
        ds_variable = ds.variables[v]

        if v not in ["time", "ensemble_member"]:
            # rename units
            ds_variable.attrs["unit"] = ds_variable.attrs["units"]
            del ds_variable.attrs["units"]

            # Add scenario and model info
            ds_variable.attrs["scenario"] = ds.attrs["scenario"]
            ds_variable.attrs["model"] = ds.attrs["model"]
    # Write out as rcmipII-{sce}-converted.nc
    ds.to_netcdf(os.path.join(root_dir, "{}-converted.nc".format(fname[:-3])))

示例#11

0

显示文件

文件： model.py 项目： qingli411/gotmtool

    def load_data(self, **kwargs):
        """Load data to xarray dataset

        :returns: (xarray.Dataset) simulation output data

        """
        # load z and zi
        with netcdf.netcdf_file(self.data, 'r', mmap=False) as ncfile:
            nc_z = ncfile.variables['z']
            nc_zi = ncfile.variables['zi']
            z = xr.DataArray(nc_z[0, :, 0, 0],
                             dims=('z'),
                             coords={'z': nc_z[0, :, 0, 0]},
                             attrs={
                                 'long_name': nc_z.long_name.decode(),
                                 'units': nc_z.units.decode()
                             })
            zi = xr.DataArray(nc_zi[0, :, 0, 0],
                              dims=('zi'),
                              coords={'zi': nc_zi[0, :, 0, 0]},
                              attrs={
                                  'long_name': nc_zi.long_name.decode(),
                                  'units': nc_zi.units.decode()
                              })
        # load other variables
        out = xr.load_dataset(
            self.data,
            drop_variables=['z', 'zi'],
            **kwargs,
        )
        out = out.assign_coords({
            'z': z,
            'zi': zi,
        })
        for var in out.data_vars:
            if 'z' in out.data_vars[var].dims:
                out.data_vars[var].assign_coords({'z': z})
            elif 'zi' in out.data_vars[var].dims:
                out.data_vars[var].assign_coords({'zi': zi})
        # return a reorderd view
        return out.transpose('z', 'zi', 'time', 'lon', 'lat')

示例#12

0

显示文件

文件： hydro_forcing.py 项目： Henry-Leexy/ESMValTool

def read_input_data(metadata: list, dim: str = 'dataset'):
    """Load data from metadata.

    Read the input data from the list of given data sets. `metadata` is
    a list of metadata containing the filenames to load. The datasets
    are stacked along the `dim` dimension. Returns an xarray.DataArray.
    """
    identifiers = []
    datasets = []
    for info in metadata:
        filename = info['filename']
        dataset = xr.load_dataset(filename)
        datasets.append(dataset)

        identifier = info[dim]
        identifiers.append(identifier)

    stacked_datasets = xr.concat(datasets, dim=dim)
    stacked_datasets[dim] = identifiers

    return stacked_datasets

示例#13

0

显示文件

def deserialize_dataset(data):
    """
    Read xarray dataset from byte stream containing the
    dataset in NetCDF format.

    Args:
        data: The bytes object containing the binary data of the
            NetCDf file.

    Returns:
        The deserialized xarray dataset.
    """

    _, filename = mkstemp()
    try:
        with open(filename, "wb") as file:
            buffer = file.write(data)
        dataset = xr.load_dataset(filename)
    finally:
        Path(filename).unlink()
    return dataset

示例#14

0

显示文件

def test_MGITM_model():

    fname = 'MGITM_LS180_F130_150615.nc'
    url = (
        "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/MGITM_LS180_F130_150615.nc")
    if not os.path.exists(fname):
        urllib.request.urlretrieve(url, fname)

    c = xr.load_dataset("MGITM_LS180_F130_150615.nc")
    for var in c:
        c[var].attrs['VAR_TYPE'] = 'data'
    c = c.rename({'Latitude': 'latitude', 'Longitude': 'longitude'})
    c['longitude'].attrs['VAR_TYPE'] = 'support_data'
    c['latitude'].attrs['VAR_TYPE'] = 'support_data'
    c['altitude'].attrs['VAR_TYPE'] = 'support_data'

    cdflib.xarray_to_cdf(c, 'MGITM_LS180_F130_150615-created-from-netcdf-input.cdf')
    d = cdflib.cdf_to_xarray('MGITM_LS180_F130_150615-created-from-netcdf-input.cdf', to_unixtime=True,
                             fillval_to_nan=True)
    os.remove('MGITM_LS180_F130_150615-created-from-netcdf-input.cdf')
    os.remove('MGITM_LS180_F130_150615.nc')

示例#15

0

显示文件

文件： interpolate_missing_variables.py 项目： Marilyth/mss-data-retrieval

def interpolate_vertical(ml_file, inter_file, new_vertical_axis):
    """
    Linearly interpolate all 4D variables of ml_file to the levels of new_vertical_axis and save it in inter_file
    """
    with xr.load_dataset(inter_file) as interpolated:
        reference = [variable for variable in interpolated.variables if len(interpolated[variable].shape) == 4][0]
        with xr.open_dataset(ml_file) as ml:
            for variable in [variable for variable in ml.variables if variable not in interpolated.variables
                                                                      and len(ml[variable].dims) == 4
                                                                      and "lev_2" in ml[variable].dims]:
                try:
                    x = np.array(ml[new_vertical_axis].data)
                    y = np.array(ml[variable].data)
                    interpolated_data = interpolate_1d(interpolated["lev"].data, x, y, axis=1)
                    attributes = ml[variable].attrs

                    interpolated[variable] = interpolated[reference].copy(data=interpolated_data)
                    interpolated[variable].attrs = ml[variable].attrs
                except Exception as e:
                    print(variable, e)
        interpolated.to_netcdf(inter_file)

示例#16

0

显示文件

    def download(self):
        outfile = self.datasets_root / 'MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc'
        get_current_timeseries(outfile=outfile)

        # made in previous notebook
        xd = xr.load_dataset(outfile)
        df = xd.to_dataframe()
        df = df[[
            'VCUR', 'UCUR', 'WCUR', 'TEMP', 'DEPTH', 'M2', 'S2', 'N2', 'K2',
            'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4', 'MK3', 'MM', 'SSA', 'SA',
            'SPD'
        ]]
        df.dropna(subset=self.columns_target, inplace=True)

        # Only keep parts with at most 5 nans in last 48 periods
        has_past = df.SPD.isna().rolling(48).sum() < 5
        df = df[has_past]

        df = df.resample('30T').mean()

        return df

示例#17

0

显示文件

文件： 20_group_interlace_pores.py 项目： Pescatore23/TOMCAT_processing

def sample_function(sample,
                    baseFolder=baseFolder,
                    destination=destination,
                    overWrite=False):
    filename = ''.join(['pore_affiliation_', sample, '.nc'])
    path = os.path.join(destination, filename)

    if not os.path.exists(path) or overWrite:
        try:
            # metadata = xr.load_dataset(os.path.join(destination, ''.join(['pore_props_',sample,'.nc'])))
            metadata = xr.load_dataset(
                os.path.join(destination, ''.join(['dyn_data_', sample,
                                                   '.nc'])))
            relevant_pores = metadata['label'].data
            pore_affiliation, labels = track_pore_affiliation(
                sample,
                relevant_pores,
                baseFolder,
                label_im=metadata['label_matrix'].data)

            data = xr.Dataset(
                {'pore_affiliation': ('label', pore_affiliation)},
                coords={'label': labels})
            # attrs={'explanation': '1 - top yarn, 2 - bottom yarn, 3 - interlace, 0 - not in contact'})
            data.attrs = metadata.attrs
            data.attrs[
                'explanation'] = '1 - top yarn, 2 - bottom yarn, 3 - interlace, 0 - not in contact'

            filename = ''.join(
                ['pore_affiliation_small_interface', sample, '.nc'])
            path = os.path.join(destination, filename)

            data.to_netcdf(path)
            return 'completed'
        except Exception:  # as e:
            # return
            traceback.print_exc()

    else:
        return 'already done'

示例#18

0

显示文件

def fudge_intervals_cbound(file):
    """make data consistent for given bound"""
    values_changed = 0
    ds = xr.load_dataset(file)
    for cbound in ["pf_lower", "pf_upper"]:
        for i in range(1, len(ds.interval)):
            higher_arr = ds[cbound][i, ...].values
            lower_arr = ds[cbound][i - 1, ...].values

            diff = higher_arr - lower_arr
            mask = diff <= 0
            num = np.count_nonzero(mask)

            if num > 0:
                print(f"    {num} value(s) changed", flush=True)
                values_changed += num
                higher_arr[mask] = lower_arr[mask] * 1.001
                ds[cbound][i, ...] = higher_arr
    ds.close()
    ds.to_netcdf(file)

    return values_changed

示例#19

0

显示文件

 def test_save_netcdf(self):
     """
     Testing for saver done here since we have a model to integrate so it's
     easy.
     """
     system = BackendTestingHelper()
     results = system.run(self.DURATION, self.DT, ZeroInput().as_cubic_splines(self.DURATION, self.DT))
     results.attrs = self.EXTRA_ATTRS
     # save to pickle
     nc_name = os.path.join(self.TEST_DIR, "netcdf_test")
     save_to_netcdf(results, nc_name)
     # actual data
     self.assertTrue(os.path.exists(nc_name + ".nc"))
     # metadata
     self.assertTrue(os.path.exists(nc_name + ".json"))
     # load and check
     loaded = xr.load_dataset(nc_name + ".nc")
     with open(nc_name + ".json", "r") as f:
         attrs = json.load(f)
     loaded.attrs = attrs
     xr.testing.assert_equal(results, loaded)
     self.assertDictEqual(loaded.attrs, self.EXTRA_ATTRS)

示例#20

0

显示文件

文件： dsea_foehn.py 项目： ZiskinZiv/PW_from_GPS

def process_IMS_data_at_station_on_dsea_foehn_dates(
        ims_path=ims_path,
        station='SEDOM',
        path=des_path,
        times=['2014-08-01', '2014-08-31']):
    import xarray as xr
    import pandas as pd
    import numpy as np
    # import matplotlib.pyplot as plt
    ws = xr.open_dataset(
        ims_path / 'IMS_WS_israeli_10mins.nc')[station].sel(time=slice(*times))
    wd = xr.open_dataset(
        ims_path / 'IMS_WD_israeli_10mins.nc')[station].sel(time=slice(*times))
    rh = xr.open_dataset(
        ims_path / 'IMS_RH_israeli_10mins.nc')[station].sel(time=slice(*times))
    ts = xr.open_dataset(
        ims_path / 'IMS_TD_israeli_10mins.nc')[station].sel(time=slice(*times))
    ds = xr.Dataset()
    ds['WS'] = ws
    ds['WD'] = wd
    ds['T'] = ts
    ds['RH'] = rh
    # convert to UTC, since IMS is always UTC+2
    new_time = ds['time'] - pd.Timedelta(2, units='H')
    new_time = new_time.dt.round('s')
    ds['time'] = new_time
    ds['PWV'] = xr.load_dataset(path / 'DSEA_PWV_GNSS_2014-08.nc')['pwv-soi']
    da8 = ds.sel(time=slice('2014-08-08T13:00:00',
                            '2014-08-08T19:00:00')).to_array('var')
    da8['time'] = np.linspace(13, 19, len(da8['time']))
    # da8['time'] = da8['time'].dt.time
    da16 = ds.sel(time=slice('2014-08-16T13:00:00',
                             '2014-08-16T19:00:00')).to_array('var')
    da16['time'] = np.linspace(13, 19, len(da16['time']))
    # da16['time'] = da16['time'].dt.time
    dss = xr.concat([da8, da16], 'date')
    # dss['date'] = [pd.to_datetime(x).date() for x in ['2014-08-08', '2014-08-16']]
    dss['date'] = ['2014-08-08', '2014-08-16']
    return dss

示例#21

0

显示文件

文件： framework.py 项目： antgobar/rmcalyse

 def prepare_for_go(self):
     logger.debug('prepare_for_go...')
     try:
         #input is either going to be a netcdf file or a list of rmc6f files
         data = xr.load_dataset(self.input)
         assert 'rmcalyse_version' in data.attrs.keys()
         logger.info('successfully loaded data from netcdf file {}'.format(
             self.input))
         self.data = data
     except FileNotFoundError as e:
         logger.debug('File {} not found: {}'.format(self.input, e))
         raise
     except (OSError, AssertionError, AttributeError):
         logger.debug(
             'File {} is not a valid rmcalyse/netcdf4 file. trying rmc6f loader...'
             .format(self.input))
         try:
             self._load_rmc6f_files()
         except Exception as e:
             logger.error(
                 'there was an unhandled error in loading {}'.format(
                     self.input))

示例#22

0

显示文件

 def saveDataToFile(self, source):
     """
   Saves the given data as database to file.
   @ In, source, DataObjects.DataObject, object to write to file
   @ Out, None
 """
     ds, meta = source.getData()
     # we actually just tell the DataSet to write out as netCDF
     path = self.get_fullpath()
     # TODO set up to use dask for on-disk operations
     # convert metadata into writeable
     for key, xml in meta.items():
         ds.attrs[key] = xmlUtils.prettify(xml.getRoot())
     # get rid of "object" types
     for var in ds:
         if ds[var].dtype == np.dtype(object):
             # is it a string?
             if mathUtils.isAString(ds[var].values[0]):
                 ds[var] = ds[var].astype(str)
     # is there existing data? Read it in and merge it, if so
     # -> we've already wiped the file in initializeDatabase if it's in write mode
     if os.path.isfile(path):
         exists = xr.load_dataset(path)
         if 'RAVEN_sample_ID' in exists:
             floor = int(exists['RAVEN_sample_ID'].values[-1]) + 1
             new = ds['RAVEN_sample_ID'].values + floor
             ds = ds.assign_coords(RAVEN_sample_ID=new)
         # NOTE order matters! This preserves the sampling order in which data was inserted
         #      into this database
         ds = xr.concat((exists, ds), 'RAVEN_sample_ID')
     # if this is open somewhere else, we can't write to it
     # TODO is there a way to check if it's writable? I can't find one ...
     try:
         ds.to_netcdf(path, engine=self._format)
     except PermissionError:
         self.raiseAnError(
             PermissionError,
             f'NetCDF file "{path}" denied RAVEN permission to write! Is it open in another program?'
         )

示例#23

0

显示文件

文件： velocity_io.py 项目： mfkiwl/Strain_2D

def read_multiple_strain_netcdfs(MyParams, plot_type):
    """
    Get all the models (e.g. gpsgridder, geostats, huang, etc.) that have computed plot_type of 
    strain rate and return them as a single xarray Dataset

    Parameters
    ----------
    MyParams: dict - Parameter Dictionary containing strain rate methods/directories in a sub-dict
    plot_type: str - The type of strain rate quantity to return. Can be max_shear, dilatation, etc.
    
    Returns
    -------
    ds_new: xarray Dataset - A dataset containing the plot_type variable from each type of model
    """
    building_dict = {}
    for k, method in enumerate(MyParams.strain_dict.keys()):
        specific_filename = os.path.join(MyParams.strain_dict[method], "{}_strain.nc".format(method))
        ds = xr.load_dataset(specific_filename)
        building_dict[method] = ds[plot_type];

    ds_new = xr.Dataset(building_dict, coords=ds.coords)
    return ds_new

示例#24

0

显示文件

文件： fudge.py 项目： ua-snap/precip-dot

def iterate_intervals():
    print(" Iterating over intervals...", flush=True)

    values_changed = 0

    # Since intervals are stored within each file, this process is
    # more straightforward than iterating over durations since all the info
    # we need to compare interval values are contained within a single file.
    # We just have to work on each file in turn.
    # Since the first iteration over intervals occurs after the iteration over
    # durations, we can also just load from the output directory since the
    # durations step would have moved everything into there.

    files = glob(os.path.join(out_path, f"*_{data_group}_*.nc"))

    for file in files:
        print(f" {os.path.basename(file)}", flush=True)

        ds = xr.load_dataset(file)

        for i in range(1, len(ds.interval)):
            higher_arr = ds['pf'][i, ..., ...].values
            lower_arr = ds['pf'][i - 1, ..., ...].values

            diff = higher_arr - lower_arr
            mask = diff <= 0
            num = np.count_nonzero(mask)

            if num > 0:
                print(f"    {num} value(s) changed", flush=True)
                values_changed += num
                higher_arr[mask] = lower_arr[mask] * 1.01
                ds['pf'][i, ..., ...] = higher_arr

        ds.close()
        ds.to_netcdf(file)

    return values_changed

示例#25

0

显示文件

文件： sparse_dot.py 项目： JiaweiZhuang/sparse_dot

def get_weights():
    ds = xr.load_dataset("weights.nc")

    n_s = ds.dims['n_s']
    col = ds['col'].values - 1
    row = ds['row'].values - 1
    S = ds['S'].values

    A = coo_matrix((S, (row, col)))

    if args.type == 'csc':
        A = A.tocsc()

    elif args.type == 'csr':
        A = A.tocsr()

    elif args.type == 'coo':
        pass

    else:
        raise ValueError

    return A

示例#26

0

显示文件

    def _set_cache_from_netcdf(cls, ds: DataSetInMem,
                               xr_path: Optional[str]) -> bool:
        import xarray as xr

        success = True
        if xr_path is not None:
            try:
                loaded_data = xr.load_dataset(xr_path, engine="h5netcdf")
                ds._cache = DataSetCacheInMem(ds)
                ds._cache._data = cls._from_xarray_dataset_to_qcodes_raw_data(
                    loaded_data)
            except (
                    FileNotFoundError,
                    OSError,
            ):  # older versions of h5py may throw a OSError here
                success = False
                warnings.warn("Could not load raw data for dataset with guid :"
                              f"{ds.guid} from location {xr_path}")
        else:
            warnings.warn(
                f"No raw data stored for dataset with guid : {ds.guid}")
            success = False
        return success

示例#27

0

显示文件

文件： dsea_foehn.py 项目： ZiskinZiv/PW_from_GPS

def produce_pwv_from_dsea_axis_station(path=axis_path, ims_path=ims_path):
    """use axis_path = work_yuval/dsea_gispyx for original soi-apn dsea station"""
    import xarray as xr
    from aux_gps import transform_ds_to_lat_lon_alt
    from aux_gps import get_unique_index
    ds = xr.load_dataset(path / 'smoothFinal_2014.nc').squeeze()
    ds = get_unique_index(ds)
    # for now cut:
    if 'axis' in path.as_posix():
        ds = ds.sel(time=slice(None, '2014-08-12'))
    ds = transform_ds_to_lat_lon_alt(ds)
    axis_zwd = ds['WetZ']
    ts = xr.open_dataset(ims_path / 'IMS_TD_israeli_10mins.nc')['SEDOM']
    axis_pwv = produce_pwv_from_zwd_with_ts_tm_from_deserve(ts=ts,
                                                            zwd=axis_zwd)
    if 'axis' in path.as_posix():
        axis_pwv.name = 'AXIS-DSEA'
    else:
        axis_pwv.name = 'SOI-DSEA'
    axis_pwv.attrs['lat'] = ds['lat'].values[0]
    axis_pwv.attrs['lon'] = ds['lon'].values[0]
    axis_pwv.attrs['alt'] = ds['alt'].values[0]
    return axis_pwv

示例#28

0

显示文件

文件： model.py 项目： Pescatore23/wicking_pnm

    def __init__(self, path):
        # waiting time statistics
        self.delta_t_025 = np.array([])
        self.delta_t_100 = np.array([])
        self.delta_t_300 = np.array([])
        self.delta_t_all = np.array([])

        print('Reading the statistics dataset at {}'.format(path))
        stats_dataset = xr.load_dataset(path)

        for key in list(stats_dataset.coords):
            if not key[-2:] == '_t': continue
            if key[3:6] == '025':
                self.delta_t_025 = np.concatenate(
                    [self.delta_t_025, stats_dataset[key].data])
            if key[3:6] == '100':
                self.delta_t_100 = np.concatenate(
                    [self.delta_t_100, stats_dataset[key].data])
            if key[3:6] == '300':
                self.delta_t_300 = np.concatenate(
                    [self.delta_t_300, stats_dataset[key].data])

        self.delta_t_all = stats_dataset['deltatall'].data

示例#29

0

显示文件

def get_pressure_lapse_rate(path=ims_path, model='LR', plot=False):
    from aux_gps import linear_fit_using_scipy_da_ts
    import matplotlib.pyplot as plt
    import xarray as xr
    from aux_gps import keep_iqr
    bp = xr.load_dataset(ims_path / 'IMS_BP_israeli_10mins.nc')
    bps = [keep_iqr(bp[x]) for x in bp]
    bp = xr.merge(bps)
    mean_p = bp.mean('time').to_array('alt')
    mean_p.name = 'mean_pressure'
    alts = [bp[x].attrs['station_alt'] for x in bp.data_vars]
    mean_p['alt'] = alts
    _, results = linear_fit_using_scipy_da_ts(mean_p,
                                              model=model,
                                              slope_factor=1,
                                              not_time=True)
    slope = results['slope']
    inter = results['intercept']
    modeled_var = slope * mean_p['alt'] + inter
    if plot:
        fig, ax = plt.subplots()
        modeled_var.plot(ax=ax, color='r')
        mean_p.plot.line(linewidth=0., marker='o', ax=ax, color='b')
        # lr = 1000 * abs(slope)
        textstr = 'Pressure lapse rate: {:.1f} hPa/km'.format(1000 * slope)
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        # place a text box in upper left in axes coords
        ax.text(0.5,
                0.95,
                textstr,
                transform=ax.transAxes,
                fontsize=12,
                verticalalignment='top',
                bbox=props)
        ax.set_xlabel('Height a.s.l [m]')
        ax.set_ylabel('Mean Pressure [hPa]')
    return results

示例#30

0

显示文件

文件： Homogenization_R.py 项目： ZiskinZiv/PW_from_GPS

def perform_pwv_filling_last_decade(path=work_yuval,
                                    fyear='2009',
                                    lyear='2019',
                                    drop=['slom', 'elro']):
    import xarray as xr
    from aux_gps import save_ncfile
    pw = xr.load_dataset(path / 'GNSS_PW_monthly_thresh_50.nc')
    pw = pw.sel(time=slice(fyear, lyear))
    pw = pw.drop_vars(drop)
    prepare_pwv_for_climatol(freq='monthly',
                             first_year=fyear,
                             last_year=lyear,
                             pwv_ds=pw)
    # then run these two lines in R:
    # homogen('PWV',2009,2019, na.strings="-999.9",dz.max=7,std=2)
    # dahstat('PWV',2009,2019,stat='series',long=TRUE)
    ds, ds_flag = read_climatol_results(first_year=fyear, last_year=lyear)
    filename = 'GNSS_PW_monthly_homogenized_filled_{}-{}.nc'.format(
        fyear, lyear)
    save_ncfile(ds, path, filename)
    filename = 'GNSS_PW_monthly_homogenized_filled_flags_{}-{}.nc'.format(
        fyear, lyear)
    save_ncfile(ds_flag, path, filename)
    return