def ingest_netcdf_cds(internet_source, downloaded_file, processed_item): ingestion_status = False try: product = { "productcode": internet_source.productcode, "version": internet_source.version } # Datasource description datasource_descr = querydb.get_datasource_descr( source_type='INTERNET', source_id=internet_source.internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf( downloaded_file, processed_item.split(':')[0], product, sub_datasource, datasource_descr, logger) except Exception as inst: logger.debug("Error in CDS Ingestion: %s" % internet_source) raise return ingestion_status
def process_list_matching_url(datasource_descr, product, subproducts, dates): #Read the IRI parameters from the file and build http url # Read the CDS parameters from the file. tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + datasource_descr.datasource_descr_id, dir=es_constants.base_tmp_dir) parameter = read_parameter_file(datasource_descr.datasource_descr_id) internet_url = datasource_descr.url # try: parameter_url = build_parameter_http(parameter) processed_list = [] processed_list_filename = es_constants.get_datastore_processed_list_prefix + datasource_descr.datasource_descr_id.replace( ":", "_") + '.list' processed_list = functions.restore_obj_from_json(processed_list, processed_list_filename) for date in dates: # iri_month = date.strftime("%b") # iri_year = date.strftime("%Y") time_url = manage_IRI_time(date, datasource_descr.frequency_id) #Check if the file is already processed if not check_processed_list(parameter_url + time_url, processed_list): continue # import urllib #python 3 urllib.parse.quote(query) # Manage dates depending on the datasource type TODO in_date = date.strftime("%Y%m%d") downloaded_file = tmpdir + '/' + in_date + '_' + product[ 'productcode'] + '.nc' # downloaded_file = file_downloaded = get_file(download_url=internet_url + parameter_url + urllib.parse.quote(time_url) + '/data.nc', target_path=downloaded_file) if not file_downloaded: logger.error('Error in downloading the file') continue # Move the file to cs folder # ingestion_status = ingestion_iri(datasource_descr, product, subproducts[0], in_date, downloaded_file, logger) ingestion_status = ingestion_netcdf.ingestion_netcdf( downloaded_file, in_date, product, subproducts, datasource_descr, logger) processed_list.append(parameter_url + time_url) functions.dump_obj_to_json(processed_list, processed_list_filename) shutil.rmtree(tmpdir)
def debug_CDS_MSLP_hour_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:MSLP:HOUR" product = {"productcode": "era5-hourly-mslp", "version": "1.0"} downloaded_file = '/data/processing/era5-hourly-mslp/1.0/archive/202101010100_reanalysis-era5-single-levels_reanalysis_mean_sea_level_pressure.nc' in_date = '202101010000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_CDS_SST_MONTH_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:SST:MONTH" product = {"productcode": "era5-monthly-sst", "version": "1.0"} downloaded_file = '/data/ingest/20210101_sst_monthly_average.nc' in_date = '202101010000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_IRI_PRCP_1Month_ingest_netcdf(self): internet_id = "IRI:NOAA:PRCP:MONTH" #'CDS:ERA5:REANALYSIS:SST:MONTH' product = {"productcode": "iri_prcp", "version": "1.0"} downloaded_file = '/data/processing/iri_prcp/1.0/archive/20210101-iri_prcp.nc' in_date = '20210101' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_CDS_RFE_DAY_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:RFE:DAY" product = {"productcode": "era5-rfe", "version": "1.0"} downloaded_file = '/data/ingest/202103200000_reanalysis-era5-single-levels_reanalysis_total_precipitation.nc' in_date = '202103200000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_IRI_surfacetemp_1Month_ingest_netcdf(self): internet_id = "IRI:NOAA:SURFACETEMP:MONTH" #'CDS:ERA5:REANALYSIS:SST:MONTH' product = {"productcode": "iri-surface-temp", "version": "1.0"} downloaded_file = '/tmp/climatestation/surface_temp_Jan_2020.nc' in_date = '20200101' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)