예제 #1
0
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
               lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/'
    CheckConnection.set_url('ncei.noaa.gov')

    x_arr_list = []
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    for day in range((date_hi - start_date).days + 1):
        dt = datetime(start_date.year, start_date.month,
                      start_date.day) + timedelta(days=day)
        catalog = TDSCatalog(
            '%s%s%.2d/%s%.2d%.2d/catalog.xml' %
            (base_url, dt.year, dt.month, dt.year, dt.month, dt.day))
        for hour in [3, 6]:
            for cycle in [0, 6, 12, 18]:
                attempts = 0
                while True:
                    try:
                        attempts += 1
                        name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % (
                            dt.year, dt.month, dt.day, cycle, hour)
                        if name in list(catalog.datasets):
                            ds_subset = catalog.datasets[name].subset()
                            query = ds_subset.query().lonlat_box(
                                north=lat_hi,
                                south=lat_lo,
                                east=lon_hi,
                                west=lon_lo).variables(*GFS_50_VAR_LIST)
                            CheckConnection.is_online()
                            data = ds_subset.get_data(query)
                            x_arr = xr.open_dataset(NetCDF4DataStore(data))
                            if 'time1' in list(x_arr.coords):
                                x_arr = x_arr.rename({'time1': 'time'})
                            x_arr_list.append(x_arr)
                        else:
                            logger.warning('dataset %s is not found' % name)
                        break
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        CheckConnection.is_online()
                        logger.error(e)
                        logger.error(
                            'Filename %s - Failed connecting to GFS Server - number of attempts: %d'
                            % (name, attempts))
                        time.sleep(2)

    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    res = dataset.interp(lon=lon_points, lat=lat_points,
                         time=time_points).to_dataframe()[GFS_50_VAR_LIST]
    res[[
        'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground'
    ]] = [[np.nan, np.nan]] * len(res)
    return res
예제 #2
0
def get_goes_image(date=datetime.utcnow(), channel=8, region='CONUS'):
    """Return dataset of GOES-16 data."""
    cat = TDSCatalog(
        'https://thredds.ucar.edu/thredds/catalog/satellite/goes/east/products/'
        'CloudAndMoistureImagery/{}/Channel{:02d}/{:%Y%m%d}/'
        'catalog.xml'.format(region, channel, date))

    ds = cat.datasets[-1]  # Get most recent dataset
    ds = ds.remote_access(service='OPENDAP')
    ds = NetCDF4DataStore(ds)
    ds = xr.open_dataset(ds)
    return ds
예제 #3
0
    def get(self):
        """
        :param coordinates: tuple like (lon, lat)
        :param variables: chosen list of variables based on the variables list for the dataset
        :param n_hours: number of hours for the prediction
        :return: a subset of the netCDF4 dataset based on the given coordinates and variables
        """
        ###########################################
        # First we construct a TDSCatalog instance using the url
        gfs_catalog = TDSCatalog(self.URL)

        # We see this catalog contains three datasets.
        # print(gfs_catalog.datasets)

        gfs_subset = gfs_catalog.datasets[self.dataset].subset()

        ###########################################
        # Define sub_point to proceed with the query
        query = gfs_subset.query()

        ###########################################
        # Then we construct a query asking for data corresponding to desired latitude and longitude and
        # for the time interval. We also ask for NetCDF version 4 data and choose the variables.
        # This request will return all vertical levels for a single point and for the time interval.
        # Note the string representation of the query is a properly encoded query string.
        # lonlat_box(west, east, south, north)
        query.lonlat_box(north=0, south=-40, east=-25, west=-70)
        now = dt.datetime.utcnow()
        query.time_range(now, now + dt.timedelta(hours=34))
        query.accept('netcdf4')

        ###########################################
        # We'll be pulling out the variables we want to use in the future,
        # as well as the values of pressure levels.
        # To get the name of the correct variable we look at the 'variables' attribute on.
        # The last of the variables listed in `coordinates` is the pressure dimension.
        # print(gfs_subset.variables)

        query.variables(*self.variables)

        ###########################################
        # We now request data from the server using this query.
        start_time = time.time()

        raw_data = gfs_subset.get_data(query)

        elapsed_time = time.time() - start_time
        print(f'Process done in {elapsed_time} seconds')

        # We need the datastore so that we can open the existing netcdf dataset we downloaded
        dataset = xr.open_dataset(NetCDF4DataStore(raw_data))
        return dataset
예제 #4
0
파일: local.py 프로젝트: acorlyon/cate-core
    def _make_local(self,
                    local_ds: 'LocalDataSource',
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        # local_name = local_ds.name
        local_id = local_ds.name

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(
            var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        data_store_path = local_ds.data_store.data_store_path
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        monitor.start("Sync " + self.name, total_work=len(self._files.items()))
        for remote_relative_filepath, coverage in self._files.items():
            child_monitor = monitor.child(work=1)

            file_name = os.path.basename(remote_relative_filepath)
            local_relative_filepath = os.path.join(local_id, file_name)
            local_absolute_filepath = os.path.join(data_store_path,
                                                   local_relative_filepath)

            remote_absolute_filepath = os.path.join(
                self._data_store.data_store_path, remote_relative_filepath)

            if isinstance(coverage, Tuple):

                time_coverage_start = coverage[0]
                time_coverage_end = coverage[1]

                remote_netcdf = None
                local_netcdf = None
                if not time_range or time_coverage_start >= time_range[
                        0] and time_coverage_end <= time_range[1]:
                    if region or var_names:
                        try:
                            remote_netcdf = NetCDF4DataStore(
                                remote_absolute_filepath)

                            local_netcdf = NetCDF4DataStore(
                                local_absolute_filepath,
                                mode='w',
                                persist=True)
                            local_netcdf.set_attributes(
                                remote_netcdf.get_attrs())

                            remote_dataset = xr.Dataset.load_store(
                                remote_netcdf)

                            process_region = False
                            if region:
                                geo_lat_min = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lat_min')
                                geo_lat_max = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lat_max')
                                geo_lon_min = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lon_min')
                                geo_lon_max = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lon_max')

                                geo_lat_res = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs,
                                    'geospatial_lon_resolution')
                                geo_lon_res = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs,
                                    'geospatial_lat_resolution')
                                if not (isnan(geo_lat_min)
                                        or isnan(geo_lat_max)
                                        or isnan(geo_lon_min)
                                        or isnan(geo_lon_max)
                                        or isnan(geo_lat_res)
                                        or isnan(geo_lon_res)):
                                    process_region = True

                                    [lat_min, lon_min, lat_max,
                                     lon_max] = region.bounds

                                    lat_min = floor(
                                        (lat_min - geo_lat_min) / geo_lat_res)
                                    lat_max = ceil(
                                        (lat_max - geo_lat_min) / geo_lat_res)
                                    lon_min = floor(
                                        (lon_min - geo_lon_min) / geo_lon_res)
                                    lon_max = ceil(
                                        (lon_max - geo_lon_min) / geo_lon_res)

                                    # TODO (kbernat): check why dataset.sel fails!
                                    remote_dataset = remote_dataset.isel(
                                        drop=False,
                                        lat=slice(lat_min, lat_max),
                                        lon=slice(lon_min, lon_max))

                                    geo_lat_max = lat_max * geo_lat_res + geo_lat_min
                                    geo_lat_min += lat_min * geo_lat_res
                                    geo_lon_max = lon_max * geo_lon_res + geo_lon_min
                                    geo_lon_min += lon_min * geo_lon_res

                            if not var_names:
                                var_names = [
                                    var_name for var_name in
                                    remote_netcdf.variables.keys()
                                ]
                            var_names.extend([
                                coord_name
                                for coord_name in remote_dataset.coords.keys()
                                if coord_name not in var_names
                            ])
                            child_monitor.start(label=file_name,
                                                total_work=len(var_names))
                            for sel_var_name in var_names:
                                var_dataset = remote_dataset.drop([
                                    var_name for var_name in
                                    remote_dataset.variables.keys()
                                    if var_name != sel_var_name
                                ])
                                if compression_enabled:
                                    var_dataset.variables.get(
                                        sel_var_name).encoding.update(
                                            encoding_update)
                                local_netcdf.store_dataset(var_dataset)
                                child_monitor.progress(work=1,
                                                       msg=sel_var_name)
                            if process_region:
                                local_netcdf.set_attribute(
                                    'geospatial_lat_min', geo_lat_min)
                                local_netcdf.set_attribute(
                                    'geospatial_lat_max', geo_lat_max)
                                local_netcdf.set_attribute(
                                    'geospatial_lon_min', geo_lon_min)
                                local_netcdf.set_attribute(
                                    'geospatial_lon_max', geo_lon_max)
                        finally:
                            if remote_netcdf:
                                remote_netcdf.close()
                            if local_netcdf:
                                local_netcdf.close()
                                local_ds.add_dataset(
                                    local_relative_filepath,
                                    (time_coverage_start, time_coverage_end))
                        child_monitor.done()
                    else:
                        shutil.copy(remote_absolute_filepath,
                                    local_absolute_filepath)
                        local_ds.add_dataset(
                            local_relative_filepath,
                            (time_coverage_start, time_coverage_end))
                        child_monitor.done()
        monitor.done()
        return local_id
예제 #5
0

savelocation = '/home/scarani/Desktop/output/goes/' + sector + '/'

baseurl = 'http://thredds-test.unidata.ucar.edu/thredds/catalog/satellite/goes16/GOES16/'
cat = TDSCatalog(baseurl + str(sector) + '/Channel' + str(channel) + '/' + str(date) + 
                 '/catalog.xml')
data = cat.datasets

ds = cat.datasets[-120]
data = cat.datasets
# 'Mercator'
# 'Fixed Grid'

ds = ds.remote_access(service='OPENDAP')
ds = NetCDF4DataStore(ds)
ds = xr.open_dataset(ds)
print(ds.projection)
timestamp = datetime.strptime(ds.start_date_time, '%Y%j%H%M%S')
data_var = ds.metpy.parse_cf('Sectorized_CMI')

x = ds['x']
y = ds['y']
z = data_var[:]

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1, projection=data_var.metpy.cartopy_crs)
#bounds = (x.min(), x.max(), y.min(), y.max())
bounds = (x.min().values.sum(), x.max().values.sum(), y.min().values.sum(), y.max().values.sum())

#colormap = 'magma_r'
예제 #6
0
    def _make_local(self,
                    local_ds: LocalDataSource,
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        # local_name = local_ds.name
        local_id = local_ds.name

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(
            var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        if region or var_names:
            protocol = _ODP_PROTOCOL_OPENDAP
        else:
            protocol = _ODP_PROTOCOL_HTTP

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        selected_file_list = self._find_files(time_range)

        if protocol == _ODP_PROTOCOL_OPENDAP:

            files = self._get_urls_list(selected_file_list, protocol)
            monitor.start('Sync ' + self.name, total_work=len(files))
            for idx, dataset_uri in enumerate(files):
                child_monitor = monitor.child(work=1)

                file_name = os.path.basename(dataset_uri)
                local_filepath = os.path.join(local_path, file_name)

                time_coverage_start = selected_file_list[idx][1]
                time_coverage_end = selected_file_list[idx][2]

                remote_netcdf = None
                local_netcdf = None
                try:
                    remote_netcdf = NetCDF4DataStore(dataset_uri)

                    local_netcdf = NetCDF4DataStore(local_filepath,
                                                    mode='w',
                                                    persist=True)
                    local_netcdf.set_attributes(remote_netcdf.get_attrs())

                    remote_dataset = xr.Dataset.load_store(remote_netcdf)

                    process_region = False
                    if region:
                        geo_lat_min = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lat_min')
                        geo_lat_max = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lat_max')
                        geo_lon_min = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lon_min')
                        geo_lon_max = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lon_max')

                        geo_lat_res = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lon_resolution')
                        geo_lon_res = self._get_harmonized_coordinate_value(
                            remote_dataset.attrs, 'geospatial_lat_resolution')
                        if not (isnan(geo_lat_min) or isnan(geo_lat_max)
                                or isnan(geo_lon_min) or isnan(geo_lon_max)
                                or isnan(geo_lat_res) or isnan(geo_lon_res)):
                            process_region = True

                            [lat_min, lon_min, lat_max,
                             lon_max] = region.bounds

                            lat_min = floor(
                                (lat_min - geo_lat_min) / geo_lat_res)
                            lat_max = ceil(
                                (lat_max - geo_lat_min) / geo_lat_res)
                            lon_min = floor(
                                (lon_min - geo_lon_min) / geo_lon_res)
                            lon_max = ceil(
                                (lon_max - geo_lon_min) / geo_lon_res)

                            # TODO (kbernat): check why dataset.sel fails!
                            remote_dataset = remote_dataset.isel(
                                drop=False,
                                lat=slice(lat_min, lat_max),
                                lon=slice(lon_min, lon_max))

                            geo_lat_max = lat_max * geo_lat_res + geo_lat_min
                            geo_lat_min += lat_min * geo_lat_res
                            geo_lon_max = lon_max * geo_lon_res + geo_lon_min
                            geo_lon_min += lon_min * geo_lon_res

                    if not var_names:
                        var_names = [
                            var_name
                            for var_name in remote_netcdf.variables.keys()
                        ]
                    var_names.extend([
                        coord_name
                        for coord_name in remote_dataset.coords.keys()
                        if coord_name not in var_names
                    ])
                    child_monitor.start(label=file_name,
                                        total_work=len(var_names))
                    for sel_var_name in var_names:
                        var_dataset = remote_dataset.drop([
                            var_name
                            for var_name in remote_dataset.variables.keys()
                            if var_name != sel_var_name
                        ])
                        if compression_enabled:
                            var_dataset.variables.get(
                                sel_var_name).encoding.update(encoding_update)
                        local_netcdf.store_dataset(var_dataset)
                        child_monitor.progress(work=1, msg=sel_var_name)
                    if process_region:
                        local_netcdf.set_attribute('geospatial_lat_min',
                                                   geo_lat_min)
                        local_netcdf.set_attribute('geospatial_lat_max',
                                                   geo_lat_max)
                        local_netcdf.set_attribute('geospatial_lon_min',
                                                   geo_lon_min)
                        local_netcdf.set_attribute('geospatial_lon_max',
                                                   geo_lon_max)

                finally:
                    if remote_netcdf:
                        remote_netcdf.close()
                    if local_netcdf:
                        local_netcdf.close()
                        local_ds.add_dataset(
                            os.path.join(local_id, file_name),
                            (time_coverage_start, time_coverage_end))

                child_monitor.done()
        else:
            outdated_file_list = []
            for file_rec in selected_file_list:
                filename, _, _, file_size, url = file_rec
                dataset_file = os.path.join(local_path, filename)
                # todo (forman, 20160915): must perform better checks on dataset_file if it is...
                # ... outdated or incomplete or corrupted.
                # JSON also includes "checksum" and "checksum_type" fields.
                if not os.path.isfile(dataset_file) or (
                        file_size
                        and os.path.getsize(dataset_file) != file_size):
                    outdated_file_list.append(file_rec)

            if outdated_file_list:
                with monitor.starting('Sync ' + self.name,
                                      len(outdated_file_list)):
                    bytes_to_download = sum(
                        [file_rec[3] for file_rec in outdated_file_list])
                    dl_stat = _DownloadStatistics(bytes_to_download)

                    file_number = 1

                    for filename, coverage_from, coverage_to, file_size, url in outdated_file_list:
                        if monitor.is_cancelled():
                            raise InterruptedError
                        dataset_file = os.path.join(local_path, filename)
                        sub_monitor = monitor.child(work=1.0)

                        # noinspection PyUnusedLocal
                        def reporthook(block_number, read_size,
                                       total_file_size):
                            dl_stat.handle_chunk(read_size)
                            if monitor.is_cancelled():
                                raise InterruptedError
                            sub_monitor.progress(work=read_size,
                                                 msg=str(dl_stat))

                        sub_monitor_msg = "file %d of %d" % (
                            file_number, len(outdated_file_list))
                        with sub_monitor.starting(sub_monitor_msg, file_size):
                            urllib.request.urlretrieve(url[protocol],
                                                       filename=dataset_file,
                                                       reporthook=reporthook)
                        file_number += 1
                        local_ds.add_dataset(os.path.join(local_id, filename),
                                             (coverage_from, coverage_to))
        local_ds.save()
        monitor.done()
예제 #7
0
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
            lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    # consider the supported time range
    if start_date < datetime(2015, 1, 15):
        logger.debug('GFS 0.25 DATASET is out of supported range')
        return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                          time_points, lat_points, lon_points)
    x_arr_list = []
    base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1'
    CheckConnection.set_url('rda.ucar.edu')
    # calculate a day prior for midnight interpolation
    http_util.session_manager.set_session_options(auth=(config['UN_RDA'],
                                                        config['PW_RDA']))
    start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                           (base_url, start_date.year, start_date.year,
                            start_date.month, start_date.day))
    ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' %
                                   (start_date.year, start_date.month,
                                    start_date.day)].subset()
    query = ds_subset.query().lonlat_box(
        north=lat_hi, south=lat_lo, east=lon_hi,
        west=lon_lo).variables(*GFS_25_VAR_LIST)
    CheckConnection.is_online()
    data = ds_subset.get_data(query)
    x_arr = xr.open_dataset(NetCDF4DataStore(data))
    if 'time1' in list(x_arr.coords):
        x_arr = x_arr.rename({'time1': 'time'})
    x_arr_list.append(x_arr)

    for day in range((date_hi - date_lo).days + 1):
        end_date = datetime(date_lo.year, date_lo.month,
                            date_lo.day) + timedelta(days=day)
        end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                             (base_url, end_date.year, end_date.year,
                              end_date.month, end_date.day))
        for cycle in [0, 6, 12, 18]:
            for hours in [3, 6]:
                name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % (
                    end_date.year, end_date.month, end_date.day, cycle, hours)
                if name in list(end_cat.datasets):
                    ds_subset = end_cat.datasets[name].subset()
                    query = ds_subset.query().lonlat_box(
                        north=lat_hi, south=lat_lo, east=lon_hi,
                        west=lon_lo).variables(*GFS_25_VAR_LIST)
                    CheckConnection.is_online()
                    data = ds_subset.get_data(query)
                    x_arr = xr.open_dataset(NetCDF4DataStore(data))
                    if 'time1' in list(x_arr.coords):
                        x_arr = x_arr.rename({'time1': 'time'})
                    x_arr_list.append(x_arr)
                else:
                    logger.warning('dataset %s is not found' % name)
    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    b = xr.DataArray([1] * len(lon_points))
    res = dataset.interp(longitude=lon_points,
                         latitude=lat_points,
                         time=time_points,
                         bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST]
    return res
예제 #8
0
    def _make_local(self,
                    local_ds: LocalDataSource,
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id
        time_range = TimeRangeLike.convert(time_range)
        region = PolygonLike.convert(region)
        var_names = VarNamesLike.convert(var_names)

        time_range, region, var_names = self._apply_make_local_fixes(time_range, region, var_names)

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        do_update_of_verified_time_coverage_start_once = True
        verified_time_coverage_start = None
        verified_time_coverage_end = None

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({'zlib': True, 'complevel': compression_level})

        if region or var_names:
            protocol = _ODP_PROTOCOL_OPENDAP
        else:
            protocol = _ODP_PROTOCOL_HTTP

        local_path = os.path.join(local_ds.data_store.data_store_path, local_id)
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'Open Data Portal\'s data source \'{}\' does not seem to have any data sets'.format(self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(TimeRangeLike.format(time_range))
            raise DataAccessError(None, msg)
        try:
            if protocol == _ODP_PROTOCOL_OPENDAP:

                do_update_of_variables_meta_info_once = True
                do_update_of_region_meta_info_once = True

                files = self._get_urls_list(selected_file_list, protocol)
                monitor.start('Sync ' + self.id, total_work=len(files))
                for idx, dataset_uri in enumerate(files):
                    child_monitor = monitor.child(work=1)

                    file_name = os.path.basename(dataset_uri)
                    local_filepath = os.path.join(local_path, file_name)

                    time_coverage_start = selected_file_list[idx][1]
                    time_coverage_end = selected_file_list[idx][2]

                    remote_netcdf = None
                    local_netcdf = None
                    try:
                        child_monitor.start(label=file_name, total_work=1)
                        remote_netcdf = NetCDF4DataStore(dataset_uri)

                        local_netcdf = NetCDF4DataStore(local_filepath, mode='w', persist=True)
                        local_netcdf.set_attributes(remote_netcdf.get_attrs())

                        remote_dataset = xr.Dataset.load_store(remote_netcdf)

                        if var_names:
                            remote_dataset = remote_dataset.drop(
                                [var_name for var_name in remote_dataset.data_vars.keys()
                                 if var_name not in var_names])

                        if region:
                            remote_dataset = subset_spatial_impl(remote_dataset, region)
                            geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds

                            local_netcdf.set_attribute('geospatial_lat_min', geo_lat_min)
                            local_netcdf.set_attribute('geospatial_lat_max', geo_lat_max)
                            local_netcdf.set_attribute('geospatial_lon_min', geo_lon_min)
                            local_netcdf.set_attribute('geospatial_lon_max', geo_lon_max)
                            if do_update_of_region_meta_info_once:
                                local_ds.meta_info['bbox_maxx'] = geo_lon_max
                                local_ds.meta_info['bbox_minx'] = geo_lon_min
                                local_ds.meta_info['bbox_maxy'] = geo_lat_max
                                local_ds.meta_info['bbox_miny'] = geo_lat_min
                                do_update_of_region_meta_info_once = False

                        if compression_enabled:
                            for sel_var_name in remote_dataset.variables.keys():
                                remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update)

                        local_netcdf.store_dataset(remote_dataset)

                        child_monitor.progress(work=1, msg=str(time_coverage_start))
                    finally:
                        if remote_netcdf:
                            remote_netcdf.close()
                        if do_update_of_variables_meta_info_once:
                            variables_info = local_ds.meta_info.get('variables', [])
                            local_ds.meta_info['variables'] = [var_info for var_info in variables_info
                                                               if var_info.get('name')
                                                               in local_netcdf.variables.keys() and
                                                               var_info.get('name')
                                                               not in local_netcdf.dimensions.keys()]
                            do_update_of_variables_meta_info_once = False
                        if local_netcdf:
                            local_netcdf.close()
                            local_ds.add_dataset(os.path.join(local_id, file_name),
                                                 (time_coverage_start, time_coverage_end))

                            if do_update_of_verified_time_coverage_start_once:
                                verified_time_coverage_start = time_coverage_start
                                do_update_of_verified_time_coverage_start_once = False
                            verified_time_coverage_end = time_coverage_end
                    child_monitor.done()
            else:
                outdated_file_list = []
                for file_rec in selected_file_list:
                    filename, _, _, file_size, url = file_rec
                    dataset_file = os.path.join(local_path, filename)
                    # todo (forman, 20160915): must perform better checks on dataset_file if it is...
                    # ... outdated or incomplete or corrupted.
                    # JSON also includes "checksum" and "checksum_type" fields.
                    if not os.path.isfile(dataset_file) or (file_size and os.path.getsize(dataset_file) != file_size):
                        outdated_file_list.append(file_rec)

                if outdated_file_list:
                    with monitor.starting('Sync ' + self.id, len(outdated_file_list)):
                        bytes_to_download = sum([file_rec[3] for file_rec in outdated_file_list])
                        dl_stat = _DownloadStatistics(bytes_to_download)

                        file_number = 1

                        for filename, coverage_from, coverage_to, file_size, url in outdated_file_list:
                            dataset_file = os.path.join(local_path, filename)
                            sub_monitor = monitor.child(work=1.0)

                            # noinspection PyUnusedLocal
                            def reporthook(block_number, read_size, total_file_size):
                                dl_stat.handle_chunk(read_size)
                                sub_monitor.progress(work=read_size, msg=str(dl_stat))

                            sub_monitor_msg = "file %d of %d" % (file_number, len(outdated_file_list))
                            with sub_monitor.starting(sub_monitor_msg, file_size):
                                urllib.request.urlretrieve(url[protocol], filename=dataset_file, reporthook=reporthook)
                            file_number += 1
                            local_ds.add_dataset(os.path.join(local_id, filename), (coverage_from, coverage_to))

                            if do_update_of_verified_time_coverage_start_once:
                                verified_time_coverage_start = coverage_from
                                do_update_of_verified_time_coverage_start_once = False
                            verified_time_coverage_end = coverage_to
        except OSError as error:
            raise DataAccessError(self, "Copying remote datasource failed, {}".format(error))
        local_ds.meta_info['temporal_coverage_start'] = TimeLike.format(verified_time_coverage_start)
        local_ds.meta_info['temporal_coverage_end'] = TimeLike.format(verified_time_coverage_end)
        local_ds.save(True)