def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/' CheckConnection.set_url('ncei.noaa.gov') x_arr_list = [] start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) for day in range((date_hi - start_date).days + 1): dt = datetime(start_date.year, start_date.month, start_date.day) + timedelta(days=day) catalog = TDSCatalog( '%s%s%.2d/%s%.2d%.2d/catalog.xml' % (base_url, dt.year, dt.month, dt.year, dt.month, dt.day)) for hour in [3, 6]: for cycle in [0, 6, 12, 18]: attempts = 0 while True: try: attempts += 1 name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % ( dt.year, dt.month, dt.day, cycle, hour) if name in list(catalog.datasets): ds_subset = catalog.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_50_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) break except Exception as e: logger.error(traceback.format_exc()) CheckConnection.is_online() logger.error(e) logger.error( 'Filename %s - Failed connecting to GFS Server - number of attempts: %d' % (name, attempts)) time.sleep(2) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 res = dataset.interp(lon=lon_points, lat=lat_points, time=time_points).to_dataframe()[GFS_50_VAR_LIST] res[[ 'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground' ]] = [[np.nan, np.nan]] * len(res) return res
def get_goes_image(date=datetime.utcnow(), channel=8, region='CONUS'): """Return dataset of GOES-16 data.""" cat = TDSCatalog( 'https://thredds.ucar.edu/thredds/catalog/satellite/goes/east/products/' 'CloudAndMoistureImagery/{}/Channel{:02d}/{:%Y%m%d}/' 'catalog.xml'.format(region, channel, date)) ds = cat.datasets[-1] # Get most recent dataset ds = ds.remote_access(service='OPENDAP') ds = NetCDF4DataStore(ds) ds = xr.open_dataset(ds) return ds
def get(self): """ :param coordinates: tuple like (lon, lat) :param variables: chosen list of variables based on the variables list for the dataset :param n_hours: number of hours for the prediction :return: a subset of the netCDF4 dataset based on the given coordinates and variables """ ########################################### # First we construct a TDSCatalog instance using the url gfs_catalog = TDSCatalog(self.URL) # We see this catalog contains three datasets. # print(gfs_catalog.datasets) gfs_subset = gfs_catalog.datasets[self.dataset].subset() ########################################### # Define sub_point to proceed with the query query = gfs_subset.query() ########################################### # Then we construct a query asking for data corresponding to desired latitude and longitude and # for the time interval. We also ask for NetCDF version 4 data and choose the variables. # This request will return all vertical levels for a single point and for the time interval. # Note the string representation of the query is a properly encoded query string. # lonlat_box(west, east, south, north) query.lonlat_box(north=0, south=-40, east=-25, west=-70) now = dt.datetime.utcnow() query.time_range(now, now + dt.timedelta(hours=34)) query.accept('netcdf4') ########################################### # We'll be pulling out the variables we want to use in the future, # as well as the values of pressure levels. # To get the name of the correct variable we look at the 'variables' attribute on. # The last of the variables listed in `coordinates` is the pressure dimension. # print(gfs_subset.variables) query.variables(*self.variables) ########################################### # We now request data from the server using this query. start_time = time.time() raw_data = gfs_subset.get_data(query) elapsed_time = time.time() - start_time print(f'Process done in {elapsed_time} seconds') # We need the datastore so that we can open the existing netcdf dataset we downloaded dataset = xr.open_dataset(NetCDF4DataStore(raw_data)) return dataset
def _make_local(self, local_ds: 'LocalDataSource', time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): # local_name = local_ds.name local_id = local_ds.name time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert( var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({ 'zlib': True, 'complevel': compression_level }) local_path = os.path.join(local_ds.data_store.data_store_path, local_id) data_store_path = local_ds.data_store.data_store_path if not os.path.exists(local_path): os.makedirs(local_path) monitor.start("Sync " + self.name, total_work=len(self._files.items())) for remote_relative_filepath, coverage in self._files.items(): child_monitor = monitor.child(work=1) file_name = os.path.basename(remote_relative_filepath) local_relative_filepath = os.path.join(local_id, file_name) local_absolute_filepath = os.path.join(data_store_path, local_relative_filepath) remote_absolute_filepath = os.path.join( self._data_store.data_store_path, remote_relative_filepath) if isinstance(coverage, Tuple): time_coverage_start = coverage[0] time_coverage_end = coverage[1] remote_netcdf = None local_netcdf = None if not time_range or time_coverage_start >= time_range[ 0] and time_coverage_end <= time_range[1]: if region or var_names: try: remote_netcdf = NetCDF4DataStore( remote_absolute_filepath) local_netcdf = NetCDF4DataStore( local_absolute_filepath, mode='w', persist=True) local_netcdf.set_attributes( remote_netcdf.get_attrs()) remote_dataset = xr.Dataset.load_store( remote_netcdf) process_region = False if region: geo_lat_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_min') geo_lat_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_max') geo_lon_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_min') geo_lon_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_max') geo_lat_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_resolution') geo_lon_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or isnan(geo_lon_min) or isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lat_min, lon_min, lat_max, lon_max] = region.bounds lat_min = floor( (lat_min - geo_lat_min) / geo_lat_res) lat_max = ceil( (lat_max - geo_lat_min) / geo_lat_res) lon_min = floor( (lon_min - geo_lon_min) / geo_lon_res) lon_max = ceil( (lon_max - geo_lon_min) / geo_lon_res) # TODO (kbernat): check why dataset.sel fails! remote_dataset = remote_dataset.isel( drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) geo_lat_max = lat_max * geo_lat_res + geo_lat_min geo_lat_min += lat_min * geo_lat_res geo_lon_max = lon_max * geo_lon_res + geo_lon_min geo_lon_min += lon_min * geo_lon_res if not var_names: var_names = [ var_name for var_name in remote_netcdf.variables.keys() ] var_names.extend([ coord_name for coord_name in remote_dataset.coords.keys() if coord_name not in var_names ]) child_monitor.start(label=file_name, total_work=len(var_names)) for sel_var_name in var_names: var_dataset = remote_dataset.drop([ var_name for var_name in remote_dataset.variables.keys() if var_name != sel_var_name ]) if compression_enabled: var_dataset.variables.get( sel_var_name).encoding.update( encoding_update) local_netcdf.store_dataset(var_dataset) child_monitor.progress(work=1, msg=sel_var_name) if process_region: local_netcdf.set_attribute( 'geospatial_lat_min', geo_lat_min) local_netcdf.set_attribute( 'geospatial_lat_max', geo_lat_max) local_netcdf.set_attribute( 'geospatial_lon_min', geo_lon_min) local_netcdf.set_attribute( 'geospatial_lon_max', geo_lon_max) finally: if remote_netcdf: remote_netcdf.close() if local_netcdf: local_netcdf.close() local_ds.add_dataset( local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() else: shutil.copy(remote_absolute_filepath, local_absolute_filepath) local_ds.add_dataset( local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() monitor.done() return local_id
savelocation = '/home/scarani/Desktop/output/goes/' + sector + '/' baseurl = 'http://thredds-test.unidata.ucar.edu/thredds/catalog/satellite/goes16/GOES16/' cat = TDSCatalog(baseurl + str(sector) + '/Channel' + str(channel) + '/' + str(date) + '/catalog.xml') data = cat.datasets ds = cat.datasets[-120] data = cat.datasets # 'Mercator' # 'Fixed Grid' ds = ds.remote_access(service='OPENDAP') ds = NetCDF4DataStore(ds) ds = xr.open_dataset(ds) print(ds.projection) timestamp = datetime.strptime(ds.start_date_time, '%Y%j%H%M%S') data_var = ds.metpy.parse_cf('Sectorized_CMI') x = ds['x'] y = ds['y'] z = data_var[:] fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(1, 1, 1, projection=data_var.metpy.cartopy_crs) #bounds = (x.min(), x.max(), y.min(), y.max()) bounds = (x.min().values.sum(), x.max().values.sum(), y.min().values.sum(), y.max().values.sum()) #colormap = 'magma_r'
def _make_local(self, local_ds: LocalDataSource, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): # local_name = local_ds.name local_id = local_ds.name time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert( var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({ 'zlib': True, 'complevel': compression_level }) if region or var_names: protocol = _ODP_PROTOCOL_OPENDAP else: protocol = _ODP_PROTOCOL_HTTP local_path = os.path.join(local_ds.data_store.data_store_path, local_id) if not os.path.exists(local_path): os.makedirs(local_path) selected_file_list = self._find_files(time_range) if protocol == _ODP_PROTOCOL_OPENDAP: files = self._get_urls_list(selected_file_list, protocol) monitor.start('Sync ' + self.name, total_work=len(files)) for idx, dataset_uri in enumerate(files): child_monitor = monitor.child(work=1) file_name = os.path.basename(dataset_uri) local_filepath = os.path.join(local_path, file_name) time_coverage_start = selected_file_list[idx][1] time_coverage_end = selected_file_list[idx][2] remote_netcdf = None local_netcdf = None try: remote_netcdf = NetCDF4DataStore(dataset_uri) local_netcdf = NetCDF4DataStore(local_filepath, mode='w', persist=True) local_netcdf.set_attributes(remote_netcdf.get_attrs()) remote_dataset = xr.Dataset.load_store(remote_netcdf) process_region = False if region: geo_lat_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_min') geo_lat_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_max') geo_lon_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_min') geo_lon_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_max') geo_lat_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_resolution') geo_lon_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or isnan(geo_lon_min) or isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lat_min, lon_min, lat_max, lon_max] = region.bounds lat_min = floor( (lat_min - geo_lat_min) / geo_lat_res) lat_max = ceil( (lat_max - geo_lat_min) / geo_lat_res) lon_min = floor( (lon_min - geo_lon_min) / geo_lon_res) lon_max = ceil( (lon_max - geo_lon_min) / geo_lon_res) # TODO (kbernat): check why dataset.sel fails! remote_dataset = remote_dataset.isel( drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) geo_lat_max = lat_max * geo_lat_res + geo_lat_min geo_lat_min += lat_min * geo_lat_res geo_lon_max = lon_max * geo_lon_res + geo_lon_min geo_lon_min += lon_min * geo_lon_res if not var_names: var_names = [ var_name for var_name in remote_netcdf.variables.keys() ] var_names.extend([ coord_name for coord_name in remote_dataset.coords.keys() if coord_name not in var_names ]) child_monitor.start(label=file_name, total_work=len(var_names)) for sel_var_name in var_names: var_dataset = remote_dataset.drop([ var_name for var_name in remote_dataset.variables.keys() if var_name != sel_var_name ]) if compression_enabled: var_dataset.variables.get( sel_var_name).encoding.update(encoding_update) local_netcdf.store_dataset(var_dataset) child_monitor.progress(work=1, msg=sel_var_name) if process_region: local_netcdf.set_attribute('geospatial_lat_min', geo_lat_min) local_netcdf.set_attribute('geospatial_lat_max', geo_lat_max) local_netcdf.set_attribute('geospatial_lon_min', geo_lon_min) local_netcdf.set_attribute('geospatial_lon_max', geo_lon_max) finally: if remote_netcdf: remote_netcdf.close() if local_netcdf: local_netcdf.close() local_ds.add_dataset( os.path.join(local_id, file_name), (time_coverage_start, time_coverage_end)) child_monitor.done() else: outdated_file_list = [] for file_rec in selected_file_list: filename, _, _, file_size, url = file_rec dataset_file = os.path.join(local_path, filename) # todo (forman, 20160915): must perform better checks on dataset_file if it is... # ... outdated or incomplete or corrupted. # JSON also includes "checksum" and "checksum_type" fields. if not os.path.isfile(dataset_file) or ( file_size and os.path.getsize(dataset_file) != file_size): outdated_file_list.append(file_rec) if outdated_file_list: with monitor.starting('Sync ' + self.name, len(outdated_file_list)): bytes_to_download = sum( [file_rec[3] for file_rec in outdated_file_list]) dl_stat = _DownloadStatistics(bytes_to_download) file_number = 1 for filename, coverage_from, coverage_to, file_size, url in outdated_file_list: if monitor.is_cancelled(): raise InterruptedError dataset_file = os.path.join(local_path, filename) sub_monitor = monitor.child(work=1.0) # noinspection PyUnusedLocal def reporthook(block_number, read_size, total_file_size): dl_stat.handle_chunk(read_size) if monitor.is_cancelled(): raise InterruptedError sub_monitor.progress(work=read_size, msg=str(dl_stat)) sub_monitor_msg = "file %d of %d" % ( file_number, len(outdated_file_list)) with sub_monitor.starting(sub_monitor_msg, file_size): urllib.request.urlretrieve(url[protocol], filename=dataset_file, reporthook=reporthook) file_number += 1 local_ds.add_dataset(os.path.join(local_id, filename), (coverage_from, coverage_to)) local_ds.save() monitor.done()
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) # consider the supported time range if start_date < datetime(2015, 1, 15): logger.debug('GFS 0.25 DATASET is out of supported range') return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points) x_arr_list = [] base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1' CheckConnection.set_url('rda.ucar.edu') # calculate a day prior for midnight interpolation http_util.session_manager.set_session_options(auth=(config['UN_RDA'], config['PW_RDA'])) start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, start_date.year, start_date.year, start_date.month, start_date.day)) ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' % (start_date.year, start_date.month, start_date.day)].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) for day in range((date_hi - date_lo).days + 1): end_date = datetime(date_lo.year, date_lo.month, date_lo.day) + timedelta(days=day) end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, end_date.year, end_date.year, end_date.month, end_date.day)) for cycle in [0, 6, 12, 18]: for hours in [3, 6]: name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % ( end_date.year, end_date.month, end_date.day, cycle, hours) if name in list(end_cat.datasets): ds_subset = end_cat.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 b = xr.DataArray([1] * len(lon_points)) res = dataset.interp(longitude=lon_points, latitude=lat_points, time=time_points, bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST] return res
def _make_local(self, local_ds: LocalDataSource, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): local_id = local_ds.id time_range = TimeRangeLike.convert(time_range) region = PolygonLike.convert(region) var_names = VarNamesLike.convert(var_names) time_range, region, var_names = self._apply_make_local_fixes(time_range, region, var_names) compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False do_update_of_verified_time_coverage_start_once = True verified_time_coverage_start = None verified_time_coverage_end = None encoding_update = dict() if compression_enabled: encoding_update.update({'zlib': True, 'complevel': compression_level}) if region or var_names: protocol = _ODP_PROTOCOL_OPENDAP else: protocol = _ODP_PROTOCOL_HTTP local_path = os.path.join(local_ds.data_store.data_store_path, local_id) if not os.path.exists(local_path): os.makedirs(local_path) selected_file_list = self._find_files(time_range) if not selected_file_list: msg = 'Open Data Portal\'s data source \'{}\' does not seem to have any data sets'.format(self.id) if time_range is not None: msg += ' in given time range {}'.format(TimeRangeLike.format(time_range)) raise DataAccessError(None, msg) try: if protocol == _ODP_PROTOCOL_OPENDAP: do_update_of_variables_meta_info_once = True do_update_of_region_meta_info_once = True files = self._get_urls_list(selected_file_list, protocol) monitor.start('Sync ' + self.id, total_work=len(files)) for idx, dataset_uri in enumerate(files): child_monitor = monitor.child(work=1) file_name = os.path.basename(dataset_uri) local_filepath = os.path.join(local_path, file_name) time_coverage_start = selected_file_list[idx][1] time_coverage_end = selected_file_list[idx][2] remote_netcdf = None local_netcdf = None try: child_monitor.start(label=file_name, total_work=1) remote_netcdf = NetCDF4DataStore(dataset_uri) local_netcdf = NetCDF4DataStore(local_filepath, mode='w', persist=True) local_netcdf.set_attributes(remote_netcdf.get_attrs()) remote_dataset = xr.Dataset.load_store(remote_netcdf) if var_names: remote_dataset = remote_dataset.drop( [var_name for var_name in remote_dataset.data_vars.keys() if var_name not in var_names]) if region: remote_dataset = subset_spatial_impl(remote_dataset, region) geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds local_netcdf.set_attribute('geospatial_lat_min', geo_lat_min) local_netcdf.set_attribute('geospatial_lat_max', geo_lat_max) local_netcdf.set_attribute('geospatial_lon_min', geo_lon_min) local_netcdf.set_attribute('geospatial_lon_max', geo_lon_max) if do_update_of_region_meta_info_once: local_ds.meta_info['bbox_maxx'] = geo_lon_max local_ds.meta_info['bbox_minx'] = geo_lon_min local_ds.meta_info['bbox_maxy'] = geo_lat_max local_ds.meta_info['bbox_miny'] = geo_lat_min do_update_of_region_meta_info_once = False if compression_enabled: for sel_var_name in remote_dataset.variables.keys(): remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update) local_netcdf.store_dataset(remote_dataset) child_monitor.progress(work=1, msg=str(time_coverage_start)) finally: if remote_netcdf: remote_netcdf.close() if do_update_of_variables_meta_info_once: variables_info = local_ds.meta_info.get('variables', []) local_ds.meta_info['variables'] = [var_info for var_info in variables_info if var_info.get('name') in local_netcdf.variables.keys() and var_info.get('name') not in local_netcdf.dimensions.keys()] do_update_of_variables_meta_info_once = False if local_netcdf: local_netcdf.close() local_ds.add_dataset(os.path.join(local_id, file_name), (time_coverage_start, time_coverage_end)) if do_update_of_verified_time_coverage_start_once: verified_time_coverage_start = time_coverage_start do_update_of_verified_time_coverage_start_once = False verified_time_coverage_end = time_coverage_end child_monitor.done() else: outdated_file_list = [] for file_rec in selected_file_list: filename, _, _, file_size, url = file_rec dataset_file = os.path.join(local_path, filename) # todo (forman, 20160915): must perform better checks on dataset_file if it is... # ... outdated or incomplete or corrupted. # JSON also includes "checksum" and "checksum_type" fields. if not os.path.isfile(dataset_file) or (file_size and os.path.getsize(dataset_file) != file_size): outdated_file_list.append(file_rec) if outdated_file_list: with monitor.starting('Sync ' + self.id, len(outdated_file_list)): bytes_to_download = sum([file_rec[3] for file_rec in outdated_file_list]) dl_stat = _DownloadStatistics(bytes_to_download) file_number = 1 for filename, coverage_from, coverage_to, file_size, url in outdated_file_list: dataset_file = os.path.join(local_path, filename) sub_monitor = monitor.child(work=1.0) # noinspection PyUnusedLocal def reporthook(block_number, read_size, total_file_size): dl_stat.handle_chunk(read_size) sub_monitor.progress(work=read_size, msg=str(dl_stat)) sub_monitor_msg = "file %d of %d" % (file_number, len(outdated_file_list)) with sub_monitor.starting(sub_monitor_msg, file_size): urllib.request.urlretrieve(url[protocol], filename=dataset_file, reporthook=reporthook) file_number += 1 local_ds.add_dataset(os.path.join(local_id, filename), (coverage_from, coverage_to)) if do_update_of_verified_time_coverage_start_once: verified_time_coverage_start = coverage_from do_update_of_verified_time_coverage_start_once = False verified_time_coverage_end = coverage_to except OSError as error: raise DataAccessError(self, "Copying remote datasource failed, {}".format(error)) local_ds.meta_info['temporal_coverage_start'] = TimeLike.format(verified_time_coverage_start) local_ds.meta_info['temporal_coverage_end'] = TimeLike.format(verified_time_coverage_end) local_ds.save(True)