Пример #1
0
    def __init__(self,
                 dataset_name,
                 img_res=(512, 512),
                 downsize_factor=(4, 4),
                 local_path=None):
        self.dataset_name = dataset_name
        self.img_res = img_res
        self.downsize_factor = downsize_factor
        self.use_local_data = True

        if self.use_local_data:
            # load from test dataset
            assert local_path is not None
            #sst_dataset_path = local_path + "/{}.npz".format(self.dataset_name)
            self.sst_dataset_path = local_path
        else:
            #sst_dataset_path = os.path.join(SST_DATASETS_PATH, "{}.npz".format(dataset_name))
            self.sst_dataset_path = SST_DATASETS_PATH
            if not os.path.exists(SST_DATASETS_PATH):
                try:
                    os.makedirs(SST_DATASETS_PATH)
                except:
                    print(SST_DATASETS_PATH + " created error")
            uris = [
                'gcs://pangeo-ocean-ml/LLC4320/SST.{id:010d}.zarr'.format(
                    id=tstep) for tstep in range(0, 4088 + 1, 73)
            ][:]
            #uris = [f'gcs://pangeo-ocean-ml/LLC4320/SST.{tstep:010d}.zarr' for tstep in range(0, 4088+1, 73)][:10]
            dsets = [
                xr.open_zarr(fsspec.get_mapper(uri), consolidated=True)
                for uri in uris
            ]
            ds = xr.combine_nested(dsets, 'timestep')
            print(ds)
            # to use ds.SST[0] to calculate nan value for all timestep
            num_nans = ds.SST[0].isnull().sum(dim=['x', 'y']).load()
            sst_valid = ds.SST.where(num_nans == 0, drop=True)
            print(sst_valid)
            sst_coarse = sst_valid.coarsen(x=self.downsize_factor[0],
                                           y=self.downsize_factor[1]).mean()
            print(sst_coarse)

            temp = sst_valid[0][0].load().values
            length = img_res[0] * img_res[1] + img_res[0] // downsize_factor[
                0] * img_res[1] // downsize_factor[1]
            print(length)
            hdf5_path = SST_DATASETS_PATH + "/output.hdf5"
            # 和普通文件操作类似,'w'、'r'、'a' 分别表示写数据、读数据、追加数据
            hdf5_file = tables.open_file(hdf5_path, mode='w')
            # 设定压缩级别和压缩方法
            filters = tables.Filters(complevel=5, complib='blosc')
            earray = hdf5_file.create_earray(
                hdf5_file.root,
                'data',  # 数据名称,之后需要通过它来访问数据
                tables.Atom.from_dtype(temp.dtype),  # 设定数据格式(和data1格式相同)
                shape=(0, length),  # 第一维的 0 表示数据可沿行扩展
                filters=filters,
                expectedrows=15000  # 完整数据大约规模,可以帮助程序提高时空利用效率
            )

            for timestep in range(sst_valid.shape[0]):
                for region in range(sst_valid.shape[1]):
                    # hr.append(sst_valid[timestep, region].load().values)
                    # lr.append(sst_coarse[timestep, region].load().values)
                    hr = sst_valid[timestep, region].load().values
                    lr = sst_coarse[timestep, region].load().values
                    hr = hr.flatten()
                    lr = lr.flatten()
                    temp = np.append(hr, lr).reshape((1, -1))
                    print(temp.shape)
                    earray.append(temp)
            print("got values!")
            hdf5_file.close()

            #np.savez(sst_dataset_path, name1=np.array(hr), name2=np.array(lr))
            print("hdf5 successfully saved")
Пример #2
0
def readwrf(filein):
    """
    This Fucntion read wrfout file and grabs varibels of internts and outputs as an xarray
    
    Parameters
    ----------
    
    files: netcdf files, delacred file name to write zar
    Returns
    -------
    
    ds_wrf: an xarray (zar) of wind speed (km h-1), temp (degC) & rh (%) 
    """
    ds_list = []
    pathlist = sorted(Path(filein).glob('wrfout_d01*'))
    # print(pathlist)
    for path in pathlist:
        path_in_str = str(path)
        wrf_file = Dataset(path_in_str, 'r')

        slp = getvar(wrf_file, "slp")
        rh = getvar(wrf_file, "rh2")
        temp = getvar(wrf_file, "T2")
        wsp_wdir = g_uvmet.get_uvmet10_wspd_wdir(wrf_file, units='m s-1')

        rain_c = getvar(wrf_file, "RAINC")
        rain_sh = getvar(wrf_file, "RAINSH")
        rain_nc = getvar(wrf_file, "RAINNC")

        # cord = get_cartopy(rh)
        # lat,lon = latlon_coords(rh)

        var_list = [slp, rh, temp, wsp_wdir, rain_c, rain_sh, rain_nc]
        # var_list = [rh]
        ds = xr.merge(var_list)
        # cord_list.append(cord)
        # lat_list.append(lat)
        # lon_list.append(lon)
        ds_list.append(ds)

    ds_wrf = xr.combine_nested(ds_list, 'time')

    # cord = cord_list[0]
    # lat, lon = lat_list[0], lon_list[0]

    # out_dir = str(context.data_dir)
    # out_dir = Path(str(context.data_dir)+str('/xr/') + str('/') +  str(ds_name) + str(f".zarr"))
    # out_dir.mkdir(parents=True, exist_ok=True)

    # # now = datetime.now() # current date and time
    # # folder_date = now.strftime("%Y%m%d")
    # # file_date = now.strftime("%Y%m%d_%H")
    # # print("date and time:",file_date)

    # ## Write and save DataArray (.zarr) file
    # # full_dir = str(out_dir) + str('/') +  str(ds_name) + str(f".zarr")

    # ds_wrf.compute()
    # ds_wrf.to_zarr(out_dir, "w")
    # print(f"wrote {out_dir}")

    return ds_wrf
Пример #3
0
 def test_nested_concat_too_many_dims_at_once(self):
     objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})]
     with pytest.raises(ValueError, match="not equal across datasets"):
         combine_nested(objs, concat_dim="x", coords="minimal")
Пример #4
0
 def test_combine_coords_join_exact(self):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     with pytest.raises(ValueError, match=r"indexes along dimension"):
         combine_nested(objs, concat_dim="x", join="exact")
Пример #5
0
def open_mf_wrf_dataset(paths,
                        chunks=None,
                        compat='no_conflicts',
                        lock=None,
                        preprocess=None):
    """Open multiple WRF files as a single WRF dataset.

    Requires dask to be installed. Note that if your files are sliced by time,
    certain diagnostic variable computed out of accumulated variables (e.g.
    PRCP) won't be available, because not computable lazily.

    This code is adapted from xarray's open_mfdataset function. The xarray
    license is reproduced in the salem/licenses directory.

    Parameters
    ----------
    paths : str or sequence
        Either a string glob in the form `path/to/my/files/*.nc` or an
        explicit list of files to open.
    chunks : int or dict, optional
        Dictionary with keys given by dimension names and values given by chunk
        sizes. In general, these should divide the dimensions of each dataset.
        If int, chunk each dimension by ``chunks`` .
        By default, chunks will be chosen to load entire input files into
        memory at once. This has a major impact on performance: please see
        xarray's full documentation for more details.
    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional
        String indicating how to compare variables of the same name for
        potential conflicts when merging:

        - 'broadcast_equals': all values must be equal when variables are
          broadcast against each other to ensure common dimensions.
        - 'equals': all values and dimensions must be the same.
        - 'identical': all values, dimensions and attributes must be the
          same.
        - 'no_conflicts': only values which are not null in both datasets
          must be equal. The returned dataset then contains the combination
          of all non-null values.
    preprocess : callable, optional
        If provided, call this function on each dataset prior to concatenation.
    lock : False, True or threading.Lock, optional
        This argument is passed on to :py:func:`dask.array.from_array`. By
        default, a per-variable lock is used when reading data from netCDF
        files with the netcdf4 and h5netcdf engines to avoid issues with
        concurrent access when using dask's multithreaded backend.

    Returns
    -------
    xarray.Dataset
    """

    if isinstance(paths, basestring):
        paths = sorted(glob(paths))
    if not paths:
        raise IOError('no files to open')

    # TODO: current workaround to dask thread problems
    import dask
    dask.config.set(scheduler='single-threaded')

    if lock is None:
        lock = NETCDF4_PYTHON_LOCK
    datasets = [
        open_wrf_dataset(p, chunks=chunks or {}, lock=lock) for p in paths
    ]
    file_objs = [ds._file_obj for ds in datasets]

    if preprocess is not None:
        datasets = [preprocess(ds) for ds in datasets]
    try:
        combined = xr.combine_nested(datasets,
                                     concat_dim='time',
                                     compat=compat)
    except AttributeError:
        combined = xr.auto_combine(datasets, concat_dim='time', compat=compat)
    combined._file_obj = _MultiFileCloser(file_objs)
    combined.attrs = datasets[0].attrs

    # drop accumulated vars if needed (TODO: make this not hard coded)
    vns = ['PRCP', 'PRCP_C', 'PRCP_NC']
    vns = [vn for vn in vns if vn in combined.variables]
    try:
        combined = combined.drop_vars(vns)
    except AttributeError:
        combined = combined.drop(vns)

    return combined
Пример #6
0
    def time_combine_nested(self):
        datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]]

        xr.combine_nested(datasets, concat_dim=[None, "T"])
Пример #7
0
def test_combine_echodata(raw_datasets):
    (
        files,
        sonar_model,
        xml_file,
        concat_dims,
        concat_data_vars,
    ) = raw_datasets
    eds = [echopype.open_raw(file, sonar_model, xml_file) for file in files]
    combined = echopype.combine_echodata(eds,
                                         "overwrite_conflicts")  # type: ignore

    for group_name in combined.group_map:
        if group_name in ("top", "sonar", "provenance"):
            continue
        combined_group: xr.Dataset = getattr(combined, group_name)
        eds_groups = [
            getattr(ed, group_name) for ed in eds
            if getattr(ed, group_name) is not None
        ]

        def union_attrs(datasets: List[xr.Dataset]) -> Dict[str, Any]:
            """
            Merges attrs from a list of datasets.
            Prioritizes keys from later datasets.
            """

            total_attrs = {}
            for ds in datasets:
                total_attrs.update(ds.attrs)
            return total_attrs

        test_ds = xr.combine_nested(
            eds_groups,
            [concat_dims.get(group_name, concat_dims["default"])],
            data_vars=concat_data_vars.get(group_name,
                                           concat_data_vars["default"]),
            coords="minimal",
            combine_attrs="drop",
        )
        test_ds.attrs.update(union_attrs(eds_groups))
        test_ds = test_ds.drop_dims(
            [
                "concat_dim",
                "old_ping_time",
                "ping_time",
                "old_time1",
                "time1",
                "old_time2",
                "time2",
            ],
            errors="ignore",
        ).drop_dims([f"{group}_attrs" for group in combined.group_map],
                    errors="ignore")
        assert combined_group is None or test_ds.identical(
            combined_group.drop_dims(
                [
                    "old_ping_time",
                    "ping_time",
                    "old_time1",
                    "time1",
                    "old_time2",
                    "time2",
                ],
                errors="ignore",
            ))
    def make_cherab_image(self):
        """
        run cherab to generate the synthetic spectral cube
        :return:
        """
        if self.radiance is not NotImplemented:
            self.radiance.close()
        if self.spectral_radiance is not NotImplemented:
            self.spectral_radiance.close()

        import_mastu_mesh(self.world, )

        # first, define camera, calculate view vectors and calculate ray lengths
        pipeline_spectral = SpectralPowerPipeline2D()
        pipeline_spectral_rad = SpectralRadiancePipeline2D()
        pipelines = [pipeline_spectral, pipeline_spectral_rad, ]
        camera = PinholeCamera(self.sensor_format_ds, fov=self.fov, pipelines=pipelines, parent=self.world)

        # orient and position the camera
        init_view_vector, init_up_vector = Vector3D(0, 0, 1), Vector3D(0, 1, 0)
        axle_1 = init_view_vector.cross(self.view_vector)
        angle = init_view_vector.angle(self.view_vector)
        t_1 = rotate_vector(angle, axle_1)

        final_up_vector = rotate_vector(-90, axle_1) * self.view_vector
        intermediate_up_vector = t_1 * init_up_vector
        angle_between = intermediate_up_vector.angle(final_up_vector)
        t_2 = rotate_vector(-angle_between, self.view_vector)

        camera.transform = translate(self.pupil_point[0],
                                     self.pupil_point[1],
                                     self.pupil_point[2], ) * t_2 * t_1

        vector_xyz = np.arange(3)
        vector_xyz = xr.DataArray(vector_xyz, coords=(vector_xyz, ), dims=('vector_xyz',), name='vector_xyz', )

        # calculating the pixel view directions
        view_vectors = xr.combine_nested(
            [xr.zeros_like(self.x_pixel_ds + self.y_pixel_ds) + self.view_vector[i] for i in [0, 1, 2, ]],
            concat_dim=(vector_xyz,), )
        view_vectors = view_vectors.rename('view_vectors')

        def v3d2da(v3d):
            """
            raysect Vector3D to xarray DataArray

            :param v3d:
            :return:
            """
            da = np.array([v3d.x, v3d.y, v3d.z, ])
            da = xr.DataArray(da, coords=(np.arange(3),), dims=('vector_xyz',), )
            return da

        # basis unit vectors defining camera view -- v_z is forward and v_y is up
        v_y = final_up_vector.normalise()
        v_x = self.view_vector.cross(v_y).normalise()
        v_z = self.view_vector.normalise()
        v_x, v_y, v_z = [v3d2da(i) for i in [v_x, v_y, v_z, ]]

        # FOV defines the widest view, with pixels defined as square.
        sensor_aspect = self.sensor_format[1] / self.sensor_format[0]
        if sensor_aspect > 1:
            fov_v = self.fov
            fov_h = self.fov / sensor_aspect
        elif sensor_aspect == 1:
            fov_v = fov_h = self.fov
        elif sensor_aspect < 1:
            fov_h = self.fov
            fov_v = self.fov * sensor_aspect
        else:
            raise Exception()

        pixel_projection = 2 * np.tan(fov_h * np.pi / 360) / self.sensor_format[0]
        view_vectors = view_vectors + (v_x * (self.x_pixel_ds - self.sensor_format[0] / 2 + 0.5) * pixel_projection) + \
                       (v_y * (self.y_pixel_ds - self.sensor_format[1] / 2 + 0.5) * pixel_projection)

        if self.verbose:
            print('--status: calculating ray lengths')
        # TODO there has to be a better way of doing this?!
        ray_lengths = xr.DataArray(np.zeros(self.sensor_format_ds), dims=('x', 'y', ), coords=(self.x_ds, self.y_ds, ))
        for idx_x, x_pixel in enumerate(self.x_pixel_ds.values):
            if self.verbose and idx_x % 10 == 0:
                print('x =', str(x_pixel))
            for idx_y, y_pixel in enumerate(self.y_pixel_ds.values):
                direction = Vector3D(*list(view_vectors.isel(x=idx_x, y=idx_y, ).values))

                intersections = []
                for p in self.world.primitives:
                    intersection = p.hit(CoreRay(self.pupil_point, direction, ))
                    if intersection is not None:
                        intersections.append(intersection)

                # find the intersection corresponding to the shortest ray length
                no_intersections = len(intersections)
                if no_intersections == 0:
                    ray_lengths.values[idx_x, idx_y] = 3
                else:
                    ray_lengths.values[idx_x, idx_y] = min([i.ray_distance for i in intersections if i.primitive.name != 'Plasma Geometry'])

        camera.spectral_bins = 40
        camera.pixel_samples = 10
        camera.min_wavelength = self.wl_min_nm
        camera.max_wavelength = self.wl_max_nm
        camera.quiet = not self.verbose
        camera.observe()

        # output to netCDF via xarray
        wl = pipeline_spectral.wavelengths
        wl = xr.DataArray(wl, coords=(wl, ), dims=('wavelength', )) * 1e-9  # ( m )
        spec_power_ds = pipeline_spectral.frame.mean * 1e9  # converting units from (W/nm) --> (W/m)
        spec_radiance_ds = pipeline_spectral_rad.frame.mean * 1e9
        coords = (self.x_ds, self.y_ds, wl, )
        dims = ('x', 'y', 'wavelength', )
        name = 'spec_power'
        attrs = {'units': 'W/m^2/str/m'}
        spec_power_ds = xr.DataArray(np.flip(spec_power_ds, axis=1), coords=coords, dims=dims, name=name, attrs=attrs, )
        spec_radiance_ds = xr.DataArray(np.flip(spec_radiance_ds, axis=1, ), coords=coords, dims=dims, name=name, attrs=attrs, )

        # calculate the centre-of-mass wavelength
        radiance_ds = spec_power_ds.integrate(dim='wavelength').assign_attrs({'units': 'W/m^2/str', })

        ds_ds = xr.Dataset({'spectral_radiance_ds': spec_radiance_ds,
                            'radiance_ds': radiance_ds,
                            'view_vectors_ds': view_vectors,
                            'ray_lengths_ds': ray_lengths
                            })

        x_p_y = self.x + self.y
        spec_power = spec_power_ds.interp_like(x_p_y) / self.cherab_down_sample  # to conserve power
        ds = xr.Dataset({'spectral_radiance': spec_power, })
        ds_ds.to_netcdf(self.fpath_ds, mode='w', )
        ds.to_netcdf(self.fpath, mode='w', )
Пример #9
0
def mfpad(dataIn, thres = 1e-2, inds = {'Type':'L','it':1}, res = 50, R = None, p = 0):
    """

    Parameters
    ----------
    dataIn : Xarray
        Contains set(s) of matrix elements to use, as output by epsproc.readMatEle().

    thres : float, optional, default 1e-2
        Threshold value for matrix elements to use in calculation.

    ind : dictionary, optional.
        Used for sub-selection of matrix elements from Xarrays.
        Default set for length gauage, single it component only, inds = {'Type':'L','it':'1'}.

    res : int, optional, default 50
        Resolution for output (theta,phi) grids.

    R : list of Euler angles or quaternions, optional.
        Define LF > MF polarization geometry/rotations.
        For default case (R = None), 3 geometries are calculated, corresponding to z-pol, x-pol and y-pol cases.
        Defined by Euler angles (p,t,c) = [0 0 0] for z-pol, [0 pi/2 0] for x-pol, [pi/2 pi/2 0] for y-pol.

    p : int, optional.
        Defines LF polarization state, p = -1...1, default p = 0 (linearly pol light along z-axis).
        TODO: add summation over p for multiple pol states in LF.

    Returns
    -------
    Ta
        Xarray (theta, phi, E, Sym) of MFPADs, summed over (l,m)

    Tlm
        Xarray (theta, phi, E, Sym, lm) of MFPAD components, expanded over all (l,m)

    """

    # Define reduced data from selection over all data
    daRed = matEleSelector(dataIn, thres = 1e-2, inds = inds)

    # Generate spherical harmonics
    Lmax = daRed.l.max()
    YlmX = sphCalc(Lmax, res = res)

    # Reindex to match data (should happen automagically, but not always!)
    # YlmXre = YlmX.reindex_like(daRed)

    # Set rotation angles for LF > MF
    if R is None:
        # Set (x,y,z) projection terms only
        # Nangs = 10
        # pRot = np.linspace(0,180,Nangs)
        # tRot = np.linspace(0,90,Nangs)
        # cRot = np.linspace(0,180,Nangs)
        # eAngs = np.array([pRot, tRot, cRot,])*np.pi/180
        # Convert to quaternions
        # R =  quaternion.from_euler_angles(pRot*np.pi/180, tRot*np.pi/180, cRot*np.pi/180)

        # Eugler angles for rotation of LF->MF, set as [0 0 0] for z-pol, [0 pi/2 0] for x-pol, [pi/2 pi/2 0] for y-pol
        pRot = [0, 0, np.pi/2]
        tRot = [0, np.pi/2, np.pi/2]
        cRot = [0, 0, 0]
        eAngs = np.array([pRot, tRot, cRot])   # List form to use later
        Euler = pd.MultiIndex.from_arrays(eAngs, names = ['P','T','C'])

        # Convert to quaternions
        R =  quaternion.from_euler_angles(pRot, tRot, cRot)


    #**************** Calculate MFPADs

    Tlm = []
    Ta = []

    # Loop over pol geoms R
    for n, Rcalc in enumerate(R):
        T = []
        # Loop over mu terms and multiply
        for mu in np.arange(-1,2):

            # Set by element replacement (preserves whole structure)
            # daTemp = daRed.copy()   # Set explicit copy for rotation.
            # daTemp.loc[{'mu':mu}].values = daTemp.loc[{'mu':mu}].values * sf.Wigner_D_element(Rcalc, 1, mu, 0).conj()

            # Issues with reindexing to extra coords at the moment, so reindex and multiply for specific mu only
            # daTemp = daTemp.sel({'mu':mu})
            # YlmXre = YlmX.reindex_like(daTemp)
            # T.append(YlmXre.conj() * daTemp)  # Output full (l,m,mu) expansion

            # Set by looping and selection
            daTemp = daRed.sel({'mu':mu}) * sf.Wigner_D_element(Rcalc, 1, mu, 0).conj()
            YlmXre = YlmX.reindex_like(daTemp)
            T.append(YlmXre.conj() * daTemp)  # Output full (l,m,mu) expansion

        # Concat & sum over symmetries
        Ts = xr.combine_nested([T[0], T[1], T[2]], concat_dim=['LM'])

        # Add dims - currently set for Euler angles only.
        # Can't seem to add mutiindex as a single element, so set dummy coord here and replace below.
        Ts = Ts.expand_dims({'Euler':[n]})  # Set as index
        # Ts = Ts.expand_dims({'p':[eAngs[0,n]], 't':[eAngs[1,n]], 'c':[eAngs[2,n]]})

        Tlm.append(Ts)
        Ta.append(Ts.sum(dim = 'LM'))

    TlmX = xr.combine_nested(Tlm, concat_dim=['Euler'])
    TaX = xr.combine_nested(Ta, concat_dim=['Euler'])

    # Assign Euler angles to dummy dim
    TlmX = TlmX.assign_coords(Euler = Euler)
    TaX = TaX.assign_coords(Euler = Euler)

    return TaX, TlmX  # , Ta, Tlm  # For debug also return lists
Пример #10
0
 #
 # open each zarr file as an xarray dataset
 # correcting the timestamp
 #
 for ds_zarr in zarr_in:
     ds = xr.open_zarr(str(ds_zarr))
     #
     # wrote incorrect times in original files, fix here
     #
     ds['time'] = the_time
     zarr_list.append(ds)
     the_time += 60
 #
 # make a virtual dataset with time as the outer dimension
 #
 zarr_time_ds = xr.combine_nested(zarr_list, 'time')
 #
 # compute the mean and perturbation for timestep 0 and
 # write out as a new zarr file
 #
 time_step = 0
 print(f"finding perturbation for  {zarr_in[time_step]}")
 temp = zarr_time_ds['TABS']
 wvel = zarr_time_ds['W']
 tr01 = zarr_time_ds['TR01']
 mean_temp = temp[time_step, :, :, :].mean(dim=('x', 'y'))
 mean_w = wvel[time_step, :, :, :].mean(dim=('x', 'y'))
 mean_tr = tr01[time_step, :, :, :].mean(dim=('x', 'y'))
 w_prime = wvel - mean_w
 T_prime = temp - mean_temp
 tr_prime = tr01 - mean_tr
Пример #11
0
 def test_combine_nested_but_need_auto_combine(self):
     objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2], "wall": [0]})]
     with raises_regex(ValueError, "cannot be combined"):
         combine_nested(objs, concat_dim="x")
Пример #12
0
def main(src_dir):
    src_dir = os.path.abspath(src_dir)
    tif_file_list = glob.glob(os.path.join(src_dir, "*.tif"))
    logger.info(f"found {len(tif_file_list)} file(s) to process")

    # test read
    array = imageio.volread(tif_file_list[0])
    shape, dtype = array.shape, array.dtype
    del array
    logger.info(f"array info {shape}, {dtype}")

    # coordinate list
    csv_file_list = glob.glob(os.path.join(src_dir, "*.csv"))
    columns = {
        "grid_x": int,
        "grid_y": int,
        "grid_z": int,
        "coord_x": float,
        "coord_y": float,
        "coord_z": float,
    }
    coords = pd.read_csv(
        csv_file_list[0],
        skiprows=6,
        usecols=list(range(3, 9)),
        names=columns.keys(),
        dtype=columns,
    )

    @delayed
    def volread_np(uri):
        return np.array(imageio.volread(uri))

    def volread_da(uri):
        return da.from_delayed(volread_np(uri), shape, dtype)

    subsets = []
    for src_path, coord in zip(tif_file_list, coords.itertuples(index=False)):
        array = volread_da(src_path)
        coord = coord._asdict()

        array = xr.DataArray(
            array,
            name="raw",
            dims=["z", "y", "x"],
            coords={k: v
                    for k, v in coord.items()},
        )

        # attach tile coordinate
        array = array.expand_dims("tile")
        array = array.assign_coords(
            {k: ("tile", [v])
             for k, v in coord.items()})

        # convert to datasets
        subset = array.to_dataset()
        subsets.append(subset)
    dataset = xr.combine_nested(subsets, concat_dim="tile")

    print(dataset)
    """
    compressor = zarr.Blosc(cname="lz4", clevel=5, shuffle=zarr.blosc.SHUFFLE)
    dataset.to_zarr(
        "_demo_converted.zarr", encoding={"raw": {"compressor": compressor}}
    )
    """

    # generate pyramids
    r = 1
    for _ in range(3):
        r *= 2
        sampler = (slice(None, None, r), ) * 2 + (slice(None), )
        sampler = {k: s for k, s in zip("xyz", sampler)}
        dataset[f"bin{r}"] = dataset["raw"][sampler]
    dataset["mip_xy"] = dataset["raw"].max("z")

    mip_dataset = dataset["mip_xy"][dataset.grid_z == 0]

    tasks = []
    counter = 1
    for iy, image_xy in mip_dataset.groupby("grid_y"):
        for ix, image in image_xy.groupby("grid_x"):
            image = image.squeeze()

            fname = f"tile{counter:03d}_x{ix:03d}_y{iy:03d}.tif"
            counter += 1

            tasks.append((fname, image))

    def imwrite(uri, image):
        imageio.imwrite(uri, image)
        print(uri)

    fname, image = zip(*tasks)
    batch_submit(imwrite, fname, image)
Пример #13
0
print (Species_1)
#ammonia sufrace layer
GC_surface_ammonia = [data['SpeciesConc_NH3'].isel(time=0,lev=0) for data in Species_1]
print (GC_surface_ammonia)




os.chdir("/data/uptrop/Projects/DEFRA-NH3/GC/geosfp_eu_naei_iccw/StateMet/2016/")
StateMet = sorted(glob.glob("GEOSChem.StateMet*.nc4"))
print (len(StateMet))

StateMet = StateMet[:]
StateMet_1 = [xr.open_dataset(file) for file in StateMet]
#print ((StateMet_1[0]))
combined = xr.combine_nested(StateMet_1, concat_dim=("time"))
print (combined.indexes)
#monthly mean
StateMet_2=combined.groupby('time.month').mean()
#print (len(StateMet_2))
print (StateMet_2)
#StateMet_3 = list(StateMet_2.groupby('time'))
StateMet_3 = list(StateMet_2.groupby("month", squeeze=False))
print (StateMet_3)

# convert unit for ammonia (dry mol/mol to ug/m3)
surface_AIRDEN = [data['Met_AIRDEN'].isel(time=0,lev=0) for data in StateMet_3] #kg/m3

surface_AIRNUMDEN_a = np.asarray(surface_AIRDEN)/MW_AIR #mol/m3
surface_AIRNUMDEN_b = surface_AIRNUMDEN_a*AVOGADRO # unit molec air/m3
surface_AIRNUMDEN = surface_AIRNUMDEN_b/1e6 #unit molec air/cm3
Пример #14
0
 def test_manual_combine_but_need_auto_combine(self):
     objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})]
     with raises_regex(ValueError, 'cannot be combined'):
         combine_nested(objs, concat_dim='x')
Пример #15
0
    if session.algorithm == 'NDVI':
        V_INDEX_d = [dask.delayed(f.NDVI) for f in AOI_d]
    elif session.algorithm == 'EVI':
        V_INDEX_d = [dask.delayed(EVI)(f) for f in AOI_d]

    combined = zip(V_INDEX_d, HSV_d)
    GVI_index = [
        dask.delayed(GVI)(i, **{
            'limits': session.limits,
            'algorithm': session.algorithm
        }) for i in combined
    ]

    datasets = dask.compute(GVI_index)  # get a list of xarray.Datasets
    da_vegetation = xr.combine_nested(datasets[0], concat_dim=['time'])
    da_vegetation = da_vegetation.chunk({
        'time': -1,
        'latitude': 1000,
        'longitude': 1000
    })

    GVDM = xr.apply_ufunc(
        decades,
        da_vegetation,
        input_core_dims=[['time']],
        exclude_dims={
            'time',
        },
        dask='parallelized',
        dask_gufunc_kwargs={'allow_rechunk': True},
Пример #16
0
 def test_combine_coords_join_exact(self):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*"):
         combine_nested(objs, concat_dim="x", join="exact")
Пример #17
0
    def _load(self, filenames=None):
        ''' Load the set of files into a single XArray structure. '''

        all_leads = [] if filenames is None else [self.contents]
        filenames = self.filenames if filenames is None else filenames

        # 0h and 1h accumulated forecast variables are named differently than
        # the rest of the forecast hours. Rename those accumulated variables if
        # needed.
        for fcst_type in ['01fcst', 'free_fcst']:

            if filenames.get(fcst_type):
                for filename in filenames.get(fcst_type):
                    print(f'Loading grib2 file: {fcst_type}, {filename}')

                # Rename variables to match free forecast variables
                dataset = xr.open_mfdataset(
                    filenames[fcst_type],
                    **self.open_kwargs,
                )

                renaming = self.free_fcst_names(dataset, fcst_type)
                if renaming and self.model not in ['hrrre', 'rrfse']:
                    print(f'RENAMING VARIABLES:')
                    for old_name, new_name in renaming.items():
                        print(f'  {old_name:>30s}  -> {new_name}')
                    dataset = dataset.rename_vars(renaming)

                if len(all_leads) == 1:
                    # Check that specific variables exist in the xarray that is
                    # already loaded (presumably 0hr), and add them if they
                    # don't. This implementation is relying on pointers to
                    # update "in place"
                    og_ds = all_leads[0]
                    bad_vars = [
                        'APCP_P8_L1_{grid}_acc',
                        'ACPCP_P8_L1_{grid}_acc',
                        'FROZR_P8_L1_{grid}_acc',
                        'NCPCP_P8_L1_{grid}_acc',
                        'WEASD_P8_L1_{grid}_acc',
                    ]
                    bad_vars = [v.format(grid=self.grid_suffix) for v in \
                            bad_vars]
                    for bad_var in bad_vars:
                        # Check to see if the bad variable is in the current
                        # dataset and NOT in the original dataset.
                        if bad_var not in og_ds.variables and \
                            dataset.get(bad_var) is not None:
                            print(f'Adding {bad_var} to og ds')
                            # Duplicate the accumulated variable with the
                            # required name
                            og_ds[bad_var] = og_ds.get(f'{bad_var}1h')
                all_leads.append(dataset)

        ret = xr.combine_nested(
            all_leads,
            compat='override',
            concat_dim=list(self.coord_dims.keys())[0],
            coords='minimal',
            data_vars='all',
        )
        return ret
Пример #18
0
def truncate(ds: xr.Dataset, max_sizes: Mapping[Hashable, int]) -> xr.Dataset:
    """Truncate a dataset along two dimensions into a form suitable for display.

    Truncation involves taking four rectangles from each corner of the dataset array
    (or arrays) and combining them into a smaller dataset array (or arrays).

    Parameters
    ----------
    ds
        The dataset to be truncated.
    max_sizes : Mapping[Hashable, int]
        A dict with keys matching dimensions and integer values indicating
        the maximum size of the dimension after truncation.

    Returns
    -------
    Dataset
        A truncated dataset.

    Warnings
    --------
    A maximum size of `n` may result in the array having size `n + 2` (and not `n`).
    The reason for this is so that pandas can be used to display the array as a table,
    and correctly truncate rows or columns (shown as ellipses ...).
    """

    if len(max_sizes) != 2:
        raise ValueError("Truncation is only supported for two dimensions")

    dims = list(max_sizes.keys())
    max_dim = max_sizes[dims[0]], max_sizes[dims[1]]
    n_dim = ds.sizes[dims[0]], ds.sizes[dims[1]]

    if n_dim[0] <= max_dim[0] + 2 and n_dim[1] <= max_dim[1] + 2:
        # No truncation required
        return ds

    if n_dim[0] <= max_dim[0] + 1:
        # Truncate dim1 only
        m_dim = n_dim[0], max_dim[1] // 2 + 1
        rows = [[(0, 0), (0, m_dim[1])]]
    elif n_dim[1] <= max_dim[1] + 1:
        # Truncate dim0 only
        m_dim = max_dim[0] // 2 + 1, n_dim[1]
        rows = [[(0, 0)], [(m_dim[0], 0)]]
    else:
        # Truncate both dimensions
        m_dim = max_dim[0] // 2 + 1, max_dim[1] // 2 + 1
        rows = [[(0, 0), (0, m_dim[1])], [(m_dim[0], 0), (m_dim[0], m_dim[1])]]

    limits = {dims[0]: m_dim[0], dims[1]: m_dim[1]}
    slices = {k: slice(v) for k, v in limits.items()}
    ds_abbr: xr.Dataset = xr.combine_nested(  # type: ignore[no-untyped-call]
        [
            [
                # Roll all of these simultaneously along with any indexes/coords
                # and then clip them using the same slice for each corner
                ds.roll(dict(zip(limits, roll)), roll_coords=True).isel(**slices)
                for roll in row
            ]
            for row in rows
        ],
        concat_dim=limits.keys(),
    )

    assert ds_abbr.sizes[dims[0]] <= max_dim[0] + 2
    assert ds_abbr.sizes[dims[1]] <= max_dim[1] + 2

    return ds_abbr
Пример #19
0
def combine_echodata(echodatas: List[EchoData],
                     combine_attrs="override") -> EchoData:
    """
    Combines multiple `EchoData` objects into a single `EchoData` object.

    Parameters
    ----------
    echodatas: List[EchoData]
        The list of `EchoData` objects to be combined.
    combine_attrs: { "override", "drop", "identical", "no_conflicts", "overwrite_conflicts" }
        String indicating how to combine attrs of the `EchoData` objects being merged.
        This parameter matches the identically named xarray parameter
        (see https://xarray.pydata.org/en/latest/generated/xarray.combine_nested.html)
        with the exception of the "overwrite_conflicts" value.

        * "override": Default. skip comparing and copy attrs from the first `EchoData`
          object to the result.
        * "drop": empty attrs on returned `EchoData` object.
        * "identical": all attrs must be the same on every object.
        * "no_conflicts": attrs from all objects are combined,
          any that have the same name must also have the same value.
        * "overwrite_conflicts": attrs from all `EchoData` objects are combined,
          attrs with conflicting keys will be overwritten by later `EchoData` objects.

    Returns
    -------
    EchoData
        An `EchoData` object with all of the data from the input `EchoData` objects combined.

    Raises
    ------
    ValueError
        If `echodatas` contains `EchoData` objects with different or `None` `sonar_model` values
        (i.e., all `EchoData` objects must have the same non-None `sonar_model` value).
    ValueError
        If EchoData objects have conflicting source file names.

    Warns
    -----
    UserWarning
        If the `sonar_model` of the input `EchoData` objects is `"EK60"` and any `EchoData` objects
        have non-monotonically increasing `ping_time`, `time1` or `time2` values,
        the corresponding values in the output `EchoData` object will be increased starting at the
        timestamp where the reversal occurs such that all values in the output are monotonically
        increasing. Additionally, the original `ping_time`, `time1` or `time2` values
        will be stored in the `Provenance` group, although this behavior may change in future
        versions.

    Warnings
    --------
    Changes in parameters between `EchoData` objects are not currently checked;
    however, they may raise an error in future versions.

    Notes
    -----
    * `EchoData` objects are combined by combining their groups individually.
    * Attributes from all groups before the combination will be stored in the provenance group,
      although this behavior may change in future versions.
    * The `source_file` and `converted_raw_path` attributes will be copied from the first
      `EchoData` object in the given list, but this may change in future versions.

    Examples
    --------
    >>> ed1 = echopype.open_converted("file1.nc")
    >>> ed2 = echopype.open_converted("file2.zarr")
    >>> combined = echopype.combine_echodata([ed1, ed2])
    """

    tree_dict = {}
    result = EchoData()
    if len(echodatas) == 0:
        return result
    result.source_file = echodatas[0].source_file
    result.converted_raw_path = echodatas[0].converted_raw_path

    sonar_model = None
    for echodata in echodatas:
        if echodata.sonar_model is None:
            raise ValueError(
                "all EchoData objects must have non-None sonar_model values")
        elif sonar_model is None:
            sonar_model = echodata.sonar_model
        elif echodata.sonar_model != sonar_model:
            raise ValueError(
                "all EchoData objects must have the same sonar_model value")

    # ping time before reversal correction
    old_ping_time = None
    # ping time after reversal correction
    new_ping_time = None
    # location time before reversal correction
    old_time1 = None
    # location time after reversal correction
    new_time1 = None
    # mru time before reversal correction
    old_time2 = None
    # mru time after reversal correction
    new_time2 = None
    # time3 before reversal correction
    old_time3 = None
    # time3 after reversal correction
    new_time3 = None

    # all attributes before combination
    # { group1: [echodata1 attrs, echodata2 attrs, ...], ... }
    old_attrs: Dict[str, List[Dict[str, Any]]] = dict()

    for group, value in EchoData.group_map.items():
        group_datasets = [
            getattr(echodata, group) for echodata in echodatas
            if getattr(echodata, group) is not None
        ]
        if group in ("top", "sonar"):
            combined_group = getattr(echodatas[0], group)
        elif group == "provenance":
            combined_group = assemble_combined_provenance([
                echodata.source_file if echodata.source_file is not None else
                echodata.converted_raw_path for echodata in echodatas
            ])
        else:
            if len(group_datasets) == 0:
                setattr(result, group, None)
                continue

            concat_dim = SONAR_MODELS[sonar_model]["concat_dims"].get(
                group, SONAR_MODELS[sonar_model]["concat_dims"]["default"])
            concat_data_vars = SONAR_MODELS[sonar_model][
                "concat_data_vars"].get(
                    group,
                    SONAR_MODELS[sonar_model]["concat_data_vars"]["default"])
            combined_group = xr.combine_nested(
                group_datasets,
                [concat_dim],
                data_vars=concat_data_vars,
                coords="minimal",
                combine_attrs="drop"
                if combine_attrs == "overwrite_conflicts" else combine_attrs,
            )
            if combine_attrs == "overwrite_conflicts":
                combined_group.attrs.update(union_attrs(group_datasets))

            if group == "beam":
                if sonar_model == "EK80":
                    combined_group[
                        "transceiver_software_version"] = combined_group[
                            "transceiver_software_version"].astype("<U10")
                    combined_group["channel"] = combined_group[
                        "channel"].astype("<U50")
                elif sonar_model == "EK60":
                    combined_group["gpt_software_version"] = combined_group[
                        "gpt_software_version"].astype("<U10")

                    # TODO: investigate further why we need to do .astype("<U50")
                    combined_group["channel"] = combined_group[
                        "channel"].astype("<U50")

            if sonar_model != "AD2CP":

                combined_group, old_ping_time, new_ping_time = check_and_correct_reversed_time(
                    combined_group, old_ping_time, new_ping_time, "ping_time",
                    sonar_model)

                if group != "nmea":
                    combined_group, old_time1, new_time1 = check_and_correct_reversed_time(
                        combined_group, old_time1, new_time1, "time1",
                        sonar_model)

                combined_group, old_time2, new_time2 = check_and_correct_reversed_time(
                    combined_group, old_time2, new_time2, "time2", sonar_model)

                combined_group, old_time3, new_time3 = check_and_correct_reversed_time(
                    combined_group, old_time3, new_time3, "time3", sonar_model)

        if len(group_datasets) > 1:
            old_attrs[group] = [
                group_dataset.attrs for group_dataset in group_datasets
            ]
        if combined_group is not None:
            # xarray inserts this dimension when concatenating along multiple dimensions
            combined_group = combined_group.drop_dims("concat_dim",
                                                      errors="ignore")

        combined_group = set_encodings(combined_group)
        if value["ep_group"] is None:
            tree_dict["root"] = combined_group
        else:
            tree_dict[value["ep_group"]] = combined_group

    # Set tree into echodata object
    result._set_tree(tree=DataTree.from_dict(tree_dict))
    result._load_tree()

    # save ping time before reversal correction
    if old_ping_time is not None:
        result.provenance["old_ping_time"] = old_ping_time
        result.provenance.attrs["reversed_ping_times"] = 1
    # save location time before reversal correction
    if old_time1 is not None:
        result.provenance["old_time1"] = old_time1
        result.provenance.attrs["reversed_ping_times"] = 1
    # save mru time before reversal correction
    if old_time2 is not None:
        result.provenance["old_time2"] = old_time2
        result.provenance.attrs["reversed_ping_times"] = 1
    # save time3 before reversal correction
    if old_time3 is not None:
        result.provenance["old_time3"] = old_time3
        result.provenance.attrs["reversed_ping_times"] = 1
    # TODO: possible parameter to disable original attributes and original ping_time storage
    # in provenance group?
    # save attrs from before combination
    for group in old_attrs:
        all_group_attrs = set()
        for group_attrs in old_attrs[group]:
            for attr in group_attrs:
                all_group_attrs.add(attr)
        echodata_filenames = []
        for ed in echodatas:
            if ed.source_file is not None:
                filepath = ed.source_file
            elif ed.converted_raw_path is not None:
                filepath = ed.converted_raw_path
            else:
                # unreachable
                raise ValueError("EchoData object does not have a file path")
            filename = Path(filepath).name
            if filename in echodata_filenames:
                raise ValueError("EchoData objects have conflicting filenames")
            echodata_filenames.append(filename)
        attrs = xr.DataArray(
            [[group_attrs.get(attr) for attr in all_group_attrs]
             for group_attrs in old_attrs[group]],
            coords={
                "echodata_filename": echodata_filenames,
                f"{group}_attr_key": list(all_group_attrs),
            },
            dims=["echodata_filename", f"{group}_attr_key"],
        )
        result.provenance = result.provenance.assign({f"{group}_attrs": attrs})

    # Add back sonar model
    result.sonar_model = sonar_model

    return result
Пример #20
0
def get_averaged_ms(ms_name,
                    tbin=None,
                    cbin=None,
                    chunks=None,
                    taql_where='',
                    columns=None,
                    chan=None,
                    corr=None,
                    data_col=None,
                    group_cols=None,
                    iter_axis=None):
    """ Performs MS averaging.
    Before averaging is performed, data selections is already performed
    during the MS  acquisition process. TAQL (if available) is used to
    perform selections for FIELD, SPW/DDID & SCAN. This is the first round of
    selection. The second round involves selections over channels and
    correlations. This is done via a slicer. With the exception of corr
    selectino, all the other selections are done before averaging. This is
    done because the averager requires 3-dimensional data.

    MS is then grouped by DDID, FIELD_ID & SCAN_NUMBER and fed into
    :meth:`average_main` which actually performs the averaging.

    This function returns to  :meth:`ragavi.visibilities.get_ms` and is
    therefore grouped and column select similarly


    Parameters
    ----------
    ms_name : :obj:`str`
        Name of the input MS
    tbin : :obj:`float`
        Time bin in seconds
    cbin : :obj:`int`
        Number of channels to bin together
    chunks : :obj:`dict`
        Size of resulting MS chunks.
    taql_where: :obj:`str`
        TAQL clause to pass to xarrayms
    columns: :obj:`list`
        Columns to be present in the data
    chan : :obj:`slicer`
        A slicer to select the channels to be present in the dataset
    corr : :obj:`slicer` or :obj:`int`
        Correlation index of slice to be present in the dataset
    data_col : :obj:`str`
        Column containing data to be used
    group_cols: :obj:`list`
        List containing columns by which to group the data
    iter_axis: :obj:`str`
        Axis over which iteration is done

    Returns
    -------
    x_dataset: :obj:`list`
        List of :obj:`xarray.Dataset` containing averaged MS. The MSs are split
         by Spectral windows and grouped depending on the type of plots.

    """

    if chunks is None:
        chunks = dict(row=10000)

    # these are the defaults in averager
    if tbin is None:
        tbin = 1
    if cbin is None:
        cbin = 1

    # ensure that these are in the selected columns
    for _ in [
            "TIME", "ANTENNA1", "ANTENNA2", "INTERVAL", "FLAG", "FLAG_ROW",
            data_col
    ]:
        if _ not in columns:
            columns.append(_)

    # must be grouped this way because of time averaging
    ms_obj = xm.xds_from_ms(
        ms_name,
        group_cols=["DATA_DESC_ID", "FIELD_ID", "SCAN_NUMBER"],
        columns=columns,
        taql_where=taql_where)

    # some channels have been selected
    # corr selection is performed after averaging!!
    if chan is not None:
        ms_obj = [_.sel(chan=chan) for _ in ms_obj]

    logger.info("Averaging MAIN table")

    # perform averaging to the MS
    avg_mss = average_main(main_ds=ms_obj,
                           time_bin_secs=tbin,
                           chan_bin_size=cbin,
                           group_row_chunks=100000,
                           respect_flag_row=False,
                           sel_cols=columns,
                           viscolumn=data_col)
    n_ams = len(avg_mss)

    # writes_ms = xm.xds_to_table(avg_mss, "tesxt", "ALL")
    logger.info("Creating averaged xarray Dataset")

    x_datasets = []
    for _a, ams in enumerate(avg_mss):
        ams = ams.compute()
        logger.info(f"Averaging {_a+1} / {n_ams}")

        datas = {
            k: (v.dims, v.data, v.attrs)
            for k, v in ams.data_vars.items() if k != "FLAG_CATEGORY"
        }

        new_ds = xr.Dataset(datas, attrs=ams.attrs, coords=ams.coords)
        new_ds = new_ds.chunk(chunks)

        x_datasets.append(new_ds)

    # data will always be grouped by SPW unless iterating over antenna
    # the most amount of grouping that will occur will be between to columns
    all_grps = []

    if len(group_cols) == 0:
        # return a single dataset
        subs = xr.combine_nested(x_datasets,
                                 concat_dim="row",
                                 compat="no_conflicts",
                                 data_vars="all",
                                 coords="different",
                                 join="outer")
        subs.attrs = {}
        subs = subs.chunk(chunks)
        all_grps.append(subs)

    elif (set(group_cols) <= {"DATA_DESC_ID", "ANTENNA1", "ANTENNA2"}
          or iter_axis == "antenna"):
        uniques = np.unique([_.attrs["DATA_DESC_ID"] for _ in x_datasets])
        uants = np.arange(vu.get_antennas(ms_name).size)

        for _d in uniques:
            subs = []
            for _ in x_datasets:
                if _.attrs["DATA_DESC_ID"] == _d:
                    subs.append(_)
            subs = xr.combine_nested(subs,
                                     concat_dim="row",
                                     compat="no_conflicts",
                                     data_vars="all",
                                     coords="different",
                                     join="outer")
            subs.attrs = {"DATA_DESC_ID": _d}
            subs = subs.chunk(chunks)

            if {"ANTENNA1", "ANTENNA2"} <= set(group_cols):
                u_bl = combinations(uants, 2)
                for p, q in u_bl:
                    n_subs = subs.where(
                        (subs.ANTENNA1 == p) & (subs.ANTENNA2 == q), drop=True)
                    n_subs.attrs = {
                        "DATA_DESC_ID": _d,
                        "ANTENNA1": p,
                        "ANTENNA2": q
                    }
                    all_grps.append(n_subs)
            elif "ANTENNA1" in group_cols:
                for p in uants[:-1]:
                    n_subs = subs.where((subs.ANTENNA1 == p), drop=True)
                    n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA1": p}
                    all_grps.append(n_subs)
            elif "ANTENNA2" in group_cols:
                for q in uants[:-1] + 1:
                    n_subs = subs.where((subs.ANTENNA2 == q), drop=True)
                    n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA2": q}
                    all_grps.append(n_subs)
            elif iter_axis == "antenna":
                for p in uants:
                    n_subs = subs.where(
                        (subs.ANTENNA1 == p) | (subs.ANTENNA2 == p), drop=True)
                    n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA": p}
                    all_grps.append(n_subs)
            else:
                all_grps.append(subs)

    elif set(group_cols) <= {"DATA_DESC_ID", "FIELD_ID", "SCAN_NUMBER"}:
        grps = {}
        # must be ddid + something else
        # if it is something other than fid and scan e.g
        # by default group by ddid
        for grp in group_cols:
            uniques = np.unique([_.attrs[grp] for _ in x_datasets])
            grps[grp] = uniques
            # grps.append(uniques)
        for com in product(*grps.values()):
            subs = []
            natt = {k: v for k, v in zip(group_cols, com)}
            for _ in x_datasets:
                if set(natt.items()) <= set(_.attrs.items()):
                    subs.append(_)
            subs = xr.combine_nested(subs,
                                     concat_dim="row",
                                     compat="no_conflicts",
                                     data_vars="all",
                                     coords="different",
                                     join="outer")
            subs.attrs = natt
            subs = subs.chunk(chunks)
            all_grps.append(subs)

    # select a corr
    if corr is not None:
        all_grps = [_.sel(corr=corr) for _ in all_grps]

    logger.info("Averaging completed.")

    return all_grps
projection = ccrs.PlateCarree()

gs1 = gridspec.GridSpec(3, 1)
gs1.update(wspace=0, hspace=0, left=0.01, right=0.99, top=0.99, bottom=0.01)

STRETCH_FACTOR = 1.0001
TARGET_LAT = 0.0
TARGET_LON = 350.0
grid = sg.grids.StretchedGrid(48, STRETCH_FACTOR, TARGET_LAT, TARGET_LON)
with suppress_stdout():
    # Load data
    da = xr.combine_nested([
        xr.open_dataset(path)['SpeciesConc_Rn222'] for path in [
            f'OutputDir/GCHP.SpeciesConc.20160101_0030z.nc4',
            f'OutputDir/GCHP.SpeciesConc.20160101_1230z.nc4',
            f'OutputDir/GCHP.SpeciesConc.20160102_0030z.nc4',
        ]
    ],
                           concat_dim='time')
    ds_wind = [
        xr.open_dataset(path) for path in [
            f'OutputDir/GCHP.StateMet_avg.20160101_0030z.nc4',
            f'OutputDir/GCHP.StateMet_avg.20160101_1230z.nc4',
            f'OutputDir/GCHP.StateMet_avg.20160102_0030z.nc4',
        ]
    ]
    u_wind = xr.combine_nested([ds['Met_U'] for ds in ds_wind],
                               concat_dim='time')
    v_wind = xr.combine_nested([ds['Met_V'] for ds in ds_wind],
                               concat_dim='time')
for time in range(3):
Пример #22
0
 def test_empty_input(self):
     assert_identical(Dataset(), combine_nested([], concat_dim="x"))
Пример #23
0
 def test_combine_nested_join_exact(self):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     with raises_regex(ValueError, "indexes along dimension"):
         combine_nested(objs, concat_dim="x", join="exact")
Пример #24
0
 def test_combine_coords_join(self, join, expected):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     actual = combine_nested(objs, concat_dim="x", join=join)
     assert_identical(expected, actual)
Пример #25
0
edate = '20171203'
files1 = glob.glob(path_ccn)
files1.sort()
files1 = [
    f for f in files1
    if f.split('.')[-3] >= sdate and f.split('.')[-3] <= edate
]
#file = files+files1

ccn_colavg1 = arm.read_netcdf(files1)
ccn_colavg1 = ccn_colavg1.resample(time='1h').nearest()
con1 = ccn_colavg1['N_CCN']
qc1 = ccn_colavg1['qc_N_CCN']
#con_full = xr.concat([con,con1],dim =index )
con_full = xr.combine_nested(
    [con, con1],
    concat_dim=['time'])[:, 3]  # supersaturation_setpoint  float32 0.5
ccn_qc_full = xr.combine_nested(
    [qc, qc1], concat_dim=['time'])[:,
                                    3]  # supersaturation_setpoint  float32 0.5
time_ccn = con_full.time

#%%
#qing_exhau = netCDF4.Dataset('/Users/qingn/20171030201180324qing_flag_00.cdf')
#qing_exhaust = qing_exhau['exhaust_flag']
#environ_path = '/Users/qingn/Downloads/drive-download-20191125T073459Z-001/MARCUS_V1_VAP_20171029.cdf'
# =============================================================================
# environ_path1 = '/Users/qingn/Downloads/MARCUS VAP/MARCUS_*.cdf'\
#
# files_env_July = glob.glob(environ_path1)
# files_env_July.sort()
Пример #26
0
def open_mfdataset(
    paths,
    chunks=None,
    concat_dim="time",
    compat="no_conflicts",
    preprocess=None,
    engine=None,
    lock=None,
    data_vars="all",
    coords="different",
    combine="nested",
    autoclose=None,
    parallel=False,
    join="outer",
    attrs_file=None,
    **kwargs,
):
    """Helper function for opening multiple files as an xarray_ dataset.
    Adapted from upstream implementation_. See docs_ for usage.

    .. todo::

            To be removed when a backend entrypoint_ is implementated.

    .. _implementation: https://github.com/pydata/xarray/blob/484d1ce5ff8969b6ca6fa942b344379725f33b9c/xarray/backends/api.py#L726
    .. _docs: https://xarray.pydata.org/en/v0.15.1/generated/xarray.open_mfdataset.html
    .. _entrypoint: https://github.com/pydata/xarray/pull/3166

    """
    if isinstance(paths, str):
        paths = sorted(glob(paths))
    else:
        paths = [str(p) if isinstance(p, Path) else p for p in paths]

    if not paths:
        raise OSError("no files to open")

    # If combine='by_coords' then this is unnecessary, but quick.
    # If combine='nested' then this creates a flat list which is easier to
    # iterate over, while saving the originally-supplied structure as "ids"
    if combine == "nested":
        if isinstance(concat_dim, (str, xr.DataArray)) or concat_dim is None:
            concat_dim = [concat_dim]

    open_kwargs = dict()

    if parallel:
        import dask

        # wrap the open_dataset, getattr, and preprocess with delayed
        open_ = dask.delayed(open_dataset)
        if preprocess is not None:
            preprocess = dask.delayed(preprocess)
    else:
        open_ = open_dataset

    datasets = [open_(p, **open_kwargs) for p in paths]
    if preprocess is not None:
        datasets = [preprocess(ds) for ds in datasets]

    if parallel:
        # calling compute here will return the datasets
        # the underlying datasets will still be stored as dask arrays
        (datasets,) = dask.compute(datasets)

    # Combine all datasets, closing them in case of a ValueError
    try:
        if combine == "nested":
            # Combined nested list by successive concat and merge operations
            # along each dimension, using structure given by "ids"
            combined = xr.combine_nested(
                datasets,
                concat_dim=concat_dim,
                compat=compat,
                data_vars=data_vars,
                coords=coords,
                join=join,
            )
        elif combine == "by_coords":
            # Redo ordering from coordinates, ignoring how they were ordered
            # previously
            combined = xr.combine_by_coords(
                datasets, compat=compat, data_vars=data_vars, coords=coords, join=join
            )
        else:
            raise ValueError(
                "{} is an invalid option for the keyword argument"
                " ``combine``".format(combine)
            )
    except ValueError:
        for ds in datasets:
            ds.close()
        raise

    # read global attributes from the attrs_file or from the first dataset
    if attrs_file is not None:
        if isinstance(attrs_file, Path):
            attrs_file = str(attrs_file)
        combined.attrs = datasets[paths.index(attrs_file)].attrs
    else:
        combined.attrs = datasets[0].attrs

    return combined
Пример #27
0
 def test_manual_concat_too_many_dims_at_once(self):
     objs = [Dataset({'x': [0], 'y': [1]}), Dataset({'y': [0], 'x': [1]})]
     with pytest.raises(ValueError, match="not equal across datasets"):
         combine_nested(objs, concat_dim='x', coords='minimal')
Пример #28
0
    an integer and returning that number
    """
    the_match = find_hour.match(str(the_file))
    return int(the_match.group(1))


if __name__ == "__main__":
    all_files.sort(key=sort_hour)
    xarray_files = []
    for item in all_files:
        with Dataset(str(item)) as nc_file:
            the_time = nc_file.variables['time'][...]
            print(datetime.fromtimestamp(the_time, tz=utc))
            ds = xr.open_dataset(item)
            xarray_files.append(ds)
    ds_big = xr.combine_nested(xarray_files, 'time')
    time_average = ds_big.mean('time')
    #
    # time_average.data_vars
    # time_average.coords
    varnames = list(ds_big.variables.keys())
    #
    #
    # create an xarray out of these files
    #
    vel_vals = [
        'VVEL_200mb', 'VVEL_250mb', 'VVEL_500mb', 'VVEL_700mb', 'VVEL_925mb',
        'VVEL_1000mb'
    ]
    vel_dict = {}
    for key in vel_vals:
Пример #29
0
def read_clear_allsky_pairs_MWI(files_clearsky):

    dict_ici = {"ici_channels": "channels", "ici_stokes_dim": "stokes_dim"}
    dict_mwi = {"mwi_channels": "channels", "mwi_stokes_dim": "stokes_dim"}
    first_iteration = True
    for file_clearsky in files_clearsky[:]:
        file_allsky = file_clearsky.replace('_clearsky.nc', '.nc')

        file_clearsky_mwi = file_clearsky.replace('ICI', 'MWI')
        file_allsky_mwi = file_allsky.replace('ICI', 'MWI')

        files = [file_allsky, file_clearsky_mwi, file_allsky_mwi]

        f_exist = [f for f in files if os.path.isfile(f)]
        if len(f_exist) == 3:

            #        if os.path.isfile(file_allsky):
            # check if both files exist
            #        print (file_allsky)
            y = xarray.open_dataset(file_allsky)
            y_ici_allsky = y.y_ici

            y = xarray.open_dataset(file_clearsky)
            #        print(file_clearsky)
            y_ici_clearsky = y.y_ici

            y = xarray.open_dataset(file_allsky_mwi)
            y_mwi_allsky = y.y_mwi

            y = xarray.open_dataset(file_clearsky_mwi)
            #        print(file_clearsky)
            y_mwi_clearsky = y.y_mwi

            allsky = y_ici_allsky.shape[0]
            clearsky = y_ici_clearsky.shape[0]
            allsky_mwi = y_mwi_allsky.shape[0]
            clearsky_mwi = y_mwi_clearsky.shape[0]

            cases = min(allsky, clearsky, allsky_mwi, clearsky_mwi)

            y_ici_allsky = y_ici_allsky[:cases, :]
            y_ici_clearsky = y_ici_clearsky[:cases, :]
            y_mwi_allsky = y_mwi_allsky[:cases, :]
            y_mwi_clearsky = y_mwi_clearsky[:cases, :]

            y_ici_allsky = y_ici_allsky.rename(dict_ici)
            y_ici_clearsky = y_ici_clearsky.rename(dict_ici)
            y_mwi_allsky = y_mwi_allsky.rename(dict_mwi)
            y_mwi_clearsky = y_mwi_clearsky.rename(dict_mwi)

            y_ici_allsky = xarray.combine_nested([y_ici_allsky, y_mwi_allsky],
                                                 concat_dim=["channels"])

            y_ici_clearsky = xarray.combine_nested(
                [y_ici_clearsky, y_mwi_clearsky], concat_dim=["channels"])

            if first_iteration:
                # initialise the xarray DataArray
                y_ici_cs = y_ici_clearsky
                y_ici_as = y_ici_allsky
                first_iteration = False
            else:
                y_ici_cs = xarray.concat([y_ici_cs, y_ici_clearsky],
                                         dim='cases')
                y_ici_as = xarray.concat([y_ici_as, y_ici_allsky], dim='cases')
    print(y_ici_cs.shape)
    print(y_ici_as.shape)

    return y_ici_cs, y_ici_as