def download_all_nc(turl, folder): nc_urls = get_nc_urls(turl, download=True) jobs = [gevent.spawn(download_nc, url, folder) for url in nc_urls] gevent.joinall(jobs, timeout=300) ncfiles = [job.value for job in jobs] return ncfiles
def fetch_xr(params, **kwargs): turl, ref_degs = params datasets = get_nc_urls(turl) # only include instruments where ref_deg appears twice (i.e. was in original filter) filt_ds = list(filter(lambda x: any(x.count(ref) > 1 for ref in ref_degs), datasets)) return xr.open_mfdataset( filt_ds, preprocess=preprocess_ds, decode_times=False, **kwargs)
def fetch_xr(params, **kwargs): turl, ref_degs = params if kwargs.get("cloud_source"): filt_ds = get_nc_urls( turl, cloud_source=True, begin_date=kwargs.get("begin_date"), end_date=kwargs.get("end_date"), ) # cleanup kwargs kwargs.pop("begin_date") kwargs.pop("end_date") kwargs.pop("cloud_source") else: datasets = get_nc_urls(turl) # only include instruments where ref_deg appears twice (i.e. was in original filter) filt_ds = list( filter(lambda x: any(x.count(ref) > 1 for ref in ref_degs), datasets)) # TODO: Place some chunking here return xr.open_mfdataset(filt_ds, engine="netcdf4", **kwargs)
def to_xarray(self, **kwargs): """ Retrieve the OOI streams data and export to Xarray Datasets, saving in memory. Args: **kwargs: Keyword arguments for xarray open_mfdataset. Returns: list: List of xarray datasets """ ref_degs = self._filtered_data_catalog["reference_designator"].values dataset_list = [] if self._data_type == "netcdf": if not self._cloud_source: if self._raw_file_dict: mvbsnc_list = perform_ek60_processing(self._raw_file_dict) for k, v in mvbsnc_list.items(): resdf = xr.open_mfdataset( v, concat_dim=["ping_time"], combine="nested", **kwargs, ) resdf.attrs["id"] = k dataset_list.append(resdf) turls = self._perform_check() if len(turls) > 0: # TODO: Cache netcdf urls so that no need to re-request data self._netcdf_urls = [get_nc_urls(turl) for turl in turls] logger.info("Acquiring data from opendap urls ...") jobs = [ gevent.spawn(fetch_xr, (url, ref_degs), **kwargs) for url in turls ] gevent.joinall(jobs, timeout=300) for job in jobs: dataset_list.append(job.value) else: self._logger.warning( f"{self._data_type} cannot be converted to xarray dataset" ) # noqa if dataset_list: self._dataset_list = dataset_list return self._dataset_list
def test_get_nc_urls(): thredds_url = ("https://opendap.oceanobservatories.org/thredds" "/catalog/ooi/[email protected]/20180606T232135" "-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_" "optode_sample/catalog.html") dataset_urls = parser.get_nc_urls(thredds_url=thredds_url) result_test = [ "https://opendap.oceanobservatories.org/thredds" "/dodsC/ooi/[email protected]/20180606T232135" "-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_" "optode_sample/deployment0004_RS03AXPS-PC03A-4A" "-CTDPFA303-streamed-ctdpf_optode_sample_20180101T000000." "596438-20180131T235959.815406.nc" ] assert isinstance(dataset_urls, list) assert dataset_urls == result_test
def fetch_xr(turl, **kwargs): datasets = get_nc_urls(turl) return xr.open_mfdataset(datasets, preprocess=preprocess_ds, decode_times=False, **kwargs)